Michael Vines 7 жил өмнө
parent
commit
f89f121d2b

+ 39 - 5
net/README.md

@@ -5,15 +5,30 @@ intended to be both dev and CD friendly.
 
 
 ### User Account Prerequisites
 ### User Account Prerequisites
 
 
-Log in to GCP with:
+GCP and AWS are supported.
+
+#### GCP
+First authenticate with
 ```bash
 ```bash
 $ gcloud auth login
 $ gcloud auth login
 ```
 ```
 
 
-Also ensure that `$(whoami)` is the name of an InfluxDB user account with enough
-access to create a new database.
+#### AWS
+Obtain your credentials from the AWS IAM Console and configure the AWS CLI with
+```bash
+$ aws configure
+```
+More information on AWS CLI configuration can be found [here](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-quick-configuration)
+
+### Metrics configuration
+Ensure that `$(whoami)` is the name of an InfluxDB user account with enough
+access to create a new InfluxDB database.  Ask mvines@ for help if needed.
 
 
 ## Quick Start
 ## Quick Start
+
+NOTE: This example uses GCP.  If you are using AWS, replace `./gce.sh` with
+`./ec2.sh` in the commands.
+
 ```bash
 ```bash
 $ cd net/
 $ cd net/
 $ ./gce.sh create -n 5 -c 1  #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
 $ ./gce.sh create -n 5 -c 1  #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
@@ -32,6 +47,10 @@ network over public IP addresses:
 ```bash
 ```bash
 $ ./gce.sh create -P ...
 $ ./gce.sh create -P ...
 ```
 ```
+or
+```bash
+$ ./ec2.sh create -P ...
+```
 
 
 ### Deploying a Snap-based network
 ### Deploying a Snap-based network
 To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master`
 To deploy the latest pre-built `edge` channel Snap (ie, latest from the `master`
@@ -46,6 +65,10 @@ First ensure the network instances are created with GPU enabled:
 ```bash
 ```bash
 $ ./gce.sh create -g ...
 $ ./gce.sh create -g ...
 ```
 ```
+or
+```bash
+$ ./ec2.sh create -g ...
+```
 
 
 If deploying a Snap-based network nothing further is required, as GPU presence
 If deploying a Snap-based network nothing further is required, as GPU presence
 is detected at runtime and the CUDA build is auto selected.
 is detected at runtime and the CUDA build is auto selected.
@@ -58,9 +81,20 @@ $ ./net.sh start -f "cuda,erasure"
 
 
 ### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
 ### How to interact with a CD testnet deployed by ci/testnet-deploy.sh
 
 
+**AWS-Specific Extra Setup**: Follow the steps in `scripts/add-solana-user-authorized_keys.sh`,
+then redeploy the testnet before continuing in this section.
+
 Taking **master-testnet-solana-com** as an example, configure your workspace for
 Taking **master-testnet-solana-com** as an example, configure your workspace for
 the testnet using:
 the testnet using:
-```
+```bash
 $ ./gce.sh config -p master-testnet-solana-com
 $ ./gce.sh config -p master-testnet-solana-com
-$ ./ssh.sh                                     # <-- Details on how to ssh into any testnet node
+```
+or
+```bash
+$ ./ec2.sh config -p master-testnet-solana-com
+```
+
+Then run the following for details on how to ssh into any testnet node
+```bash
+$ ./ssh.sh
 ```
 ```

+ 1 - 0
net/ec2.sh

@@ -0,0 +1 @@
+gce.sh

+ 142 - 80
net/gce.sh

@@ -1,27 +1,44 @@
 #!/bin/bash -e
 #!/bin/bash -e
 
 
 here=$(dirname "$0")
 here=$(dirname "$0")
-# shellcheck source=net/scripts/gcloud.sh
-source "$here"/scripts/gcloud.sh
 # shellcheck source=net/common.sh
 # shellcheck source=net/common.sh
 source "$here"/common.sh
 source "$here"/common.sh
 
 
+cloudProvider=$(basename "$0" .sh)
+case $cloudProvider in
+gce)
+  # shellcheck source=net/scripts/gce-provider.sh
+  source "$here"/scripts/gce-provider.sh
+
+  imageName="ubuntu-16-04-cuda-9-2-new"
+  leaderMachineType=n1-standard-16
+  validatorMachineType=n1-standard-4
+  clientMachineType=n1-standard-16
+  ;;
+ec2)
+  # shellcheck source=net/scripts/ec2-provider.sh
+  source "$here"/scripts/ec2-provider.sh
+
+  imageName="ami-04169656fea786776"
+  leaderMachineType=m4.4xlarge
+  validatorMachineType=m4.xlarge
+  clientMachineType=m4.4xlarge
+  ;;
+*)
+  echo "Error: Unknown cloud provider: $cloudProvider"
+  ;;
+esac
+
+
 prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
 prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
 validatorNodeCount=5
 validatorNodeCount=5
 clientNodeCount=1
 clientNodeCount=1
-leaderBootDiskSize=1TB
-leaderMachineType=n1-standard-16
-leaderAccelerator=
-validatorMachineType=n1-standard-4
-validatorBootDiskSize=$leaderBootDiskSize
-validatorAccelerator=
-clientMachineType=n1-standard-16
-clientBootDiskSize=40GB
-clientAccelerator=
-
-imageName="ubuntu-16-04-cuda-9-2-new"
+leaderBootDiskSizeInGb=1000
+validatorBootDiskSizeInGb=$leaderBootDiskSizeInGb
+clientBootDiskSizeInGb=40
+
 publicNetwork=false
 publicNetwork=false
-zone="us-west1-b"
+enableGpu=false
 leaderAddress=
 leaderAddress=
 
 
 usage() {
 usage() {
@@ -33,7 +50,7 @@ usage() {
   cat <<EOF
   cat <<EOF
 usage: $0 [create|config|delete] [common options] [command-specific options]
 usage: $0 [create|config|delete] [common options] [command-specific options]
 
 
-Configure a GCE-based testnet
+Manage testnet instances
 
 
  create - create a new testnet (implies 'config')
  create - create a new testnet (implies 'config')
  config - configure the testnet and write a config file describing it
  config - configure the testnet and write a config file describing it
@@ -47,10 +64,13 @@ Configure a GCE-based testnet
    -n [number]      - Number of validator nodes (default: $validatorNodeCount)
    -n [number]      - Number of validator nodes (default: $validatorNodeCount)
    -c [number]      - Number of client nodes (default: $clientNodeCount)
    -c [number]      - Number of client nodes (default: $clientNodeCount)
    -P               - Use public network IP addresses (default: $publicNetwork)
    -P               - Use public network IP addresses (default: $publicNetwork)
-   -z [zone]        - GCP Zone for the nodes (default: $zone)
-   -i [imageName]   - Existing image on GCE (default: $imageName)
-   -g               - Enable GPU
-   -a [address]     - Set the leader node's external IP address to this GCE address
+   -z [zone]        - Zone for the nodes (default: $zone)
+   -g               - Enable GPU (default: $enableGpu)
+   -a [address]     - Set the leader node's external IP address to this value.
+                      For GCE, [address] is the "name" of the desired External
+                      IP Address.
+                      For EC2, [address] is the "allocation ID" of the desired
+                      Elastic IP.
 
 
  config-specific options:
  config-specific options:
    none
    none
@@ -68,7 +88,7 @@ command=$1
 shift
 shift
 [[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
 [[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
 
 
-while getopts "h?p:Pi:n:c:z:ga:" opt; do
+while getopts "h?p:Pn:c:z:ga:" opt; do
   case $opt in
   case $opt in
   h | \?)
   h | \?)
     usage
     usage
@@ -80,9 +100,6 @@ while getopts "h?p:Pi:n:c:z:ga:" opt; do
   P)
   P)
     publicNetwork=true
     publicNetwork=true
     ;;
     ;;
-  i)
-    imageName=$OPTARG
-    ;;
   n)
   n)
     validatorNodeCount=$OPTARG
     validatorNodeCount=$OPTARG
     ;;
     ;;
@@ -90,10 +107,10 @@ while getopts "h?p:Pi:n:c:z:ga:" opt; do
     clientNodeCount=$OPTARG
     clientNodeCount=$OPTARG
     ;;
     ;;
   z)
   z)
-    zone=$OPTARG
+    cloud_SetZone "$OPTARG"
     ;;
     ;;
   g)
   g)
-    leaderAccelerator="count=4,type=nvidia-tesla-k80"
+    enableGpu=true
     ;;
     ;;
   a)
   a)
     leaderAddress=$OPTARG
     leaderAddress=$OPTARG
@@ -108,6 +125,37 @@ shift $((OPTIND - 1))
 [[ -z $1 ]] || usage "Unexpected argument: $1"
 [[ -z $1 ]] || usage "Unexpected argument: $1"
 sshPrivateKey="$netConfigDir/id_$prefix"
 sshPrivateKey="$netConfigDir/id_$prefix"
 
 
+
+# cloud_ForEachInstance [cmd] [extra args to cmd]
+#
+# Execute a command for each element in the `instances` array
+#
+#   cmd   - The command to execute on each instance
+#           The command will receive arguments followed by any
+#           additionl arguments supplied to cloud_ForEachInstance:
+#               name     - name of the instance
+#               publicIp - The public IP address of this instance
+#               privateIp - The priate IP address of this instance
+#               count    - Monotonically increasing count for each
+#                          invocation of cmd, starting at 1
+#               ...      - Extra args to cmd..
+#
+#
+cloud_ForEachInstance() {
+  declare cmd="$1"
+  shift
+  [[ -n $cmd ]] || { echo cloud_ForEachInstance: cmd not specified; exit 1; }
+
+  declare count=1
+  for info in "${instances[@]}"; do
+    declare name publicIp privateIp
+    IFS=: read -r name publicIp privateIp < <(echo "$info")
+
+    eval "$cmd" "$name" "$publicIp" "$privateIp" "$count" "$@"
+    count=$((count + 1))
+  done
+}
+
 prepareInstancesAndWriteConfigFile() {
 prepareInstancesAndWriteConfigFile() {
   $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
   $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
 
 
@@ -122,10 +170,10 @@ EOF
 
 
   recordInstanceIp() {
   recordInstanceIp() {
     declare name="$1"
     declare name="$1"
-    declare publicIp="$3"
-    declare privateIp="$4"
+    declare publicIp="$2"
+    declare privateIp="$3"
 
 
-    declare arrayName="$6"
+    declare arrayName="$5"
 
 
     echo "$arrayName+=($publicIp)  # $name" >> "$configFile"
     echo "$arrayName+=($publicIp)  # $name" >> "$configFile"
     if [[ $arrayName = "leaderIp" ]]; then
     if [[ $arrayName = "leaderIp" ]]; then
@@ -139,121 +187,133 @@ EOF
 
 
   waitForStartupComplete() {
   waitForStartupComplete() {
     declare name="$1"
     declare name="$1"
-    declare publicIp="$3"
+    declare publicIp="$2"
 
 
     echo "Waiting for $name to finish booting..."
     echo "Waiting for $name to finish booting..."
     (
     (
       for i in $(seq 1 30); do
       for i in $(seq 1 30); do
-        if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then
+        if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"); then
           break
           break
         fi
         fi
         sleep 2
         sleep 2
         echo "Retry $i..."
         echo "Retry $i..."
       done
       done
     )
     )
+    echo "$name has booted."
   }
   }
 
 
   echo "Looking for leader instance..."
   echo "Looking for leader instance..."
-  gcloud_FindInstances "name=$prefix-leader" show
+  cloud_FindInstance "$prefix-leader"
   [[ ${#instances[@]} -eq 1 ]] || {
   [[ ${#instances[@]} -eq 1 ]] || {
     echo "Unable to find leader"
     echo "Unable to find leader"
     exit 1
     exit 1
   }
   }
 
 
-  echo "Fetching $sshPrivateKey from $leaderName"
   (
   (
-    rm -rf "$sshPrivateKey"{,pub}
-
     declare leaderName
     declare leaderName
-    declare leaderZone
     declare leaderIp
     declare leaderIp
-    IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}")
+    IFS=: read -r leaderName leaderIp _ < <(echo "${instances[0]}")
 
 
-    set -x
+    # Try to ping the machine first.
+    timeout 60s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
 
 
-    # Try to ping the machine first.  There can be a delay between when the
-    # instance is reported as RUNNING and when it's reachable over the network
-    timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
+    if [[ ! -r $sshPrivateKey ]]; then
+      echo "Fetching $sshPrivateKey from $leaderName"
 
 
-    # Try to scp in a couple times, sshd may not yet be up even though the
-    # machine can be pinged...
-    set -o pipefail
-    for i in $(seq 1 10); do
-      if gcloud compute scp --zone "$leaderZone" \
-          "$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then
-        break
-      fi
-      sleep 1
-      echo "Retry $i..."
-    done
+      # Try to scp in a couple times, sshd may not yet be up even though the
+      # machine can be pinged...
+      set -x -o pipefail
+      for i in $(seq 1 30); do
+        if cloud_FetchFile "$leaderName" "$leaderIp" /solana-id_ecdsa "$sshPrivateKey"; then
+          break
+        fi
+
+        sleep 1
+        echo "Retry $i..."
+      done
 
 
-    chmod 400 "$sshPrivateKey"
+      chmod 400 "$sshPrivateKey"
+      ls -l "$sshPrivateKey"
+    fi
   )
   )
 
 
   echo "leaderIp=()" >> "$configFile"
   echo "leaderIp=()" >> "$configFile"
-  gcloud_ForEachInstance recordInstanceIp leaderIp
-  gcloud_ForEachInstance waitForStartupComplete
+  cloud_ForEachInstance recordInstanceIp leaderIp
+  cloud_ForEachInstance waitForStartupComplete
 
 
   echo "Looking for validator instances..."
   echo "Looking for validator instances..."
-  gcloud_FindInstances "name~^$prefix-validator" show
+  cloud_FindInstances "$prefix-validator"
   [[ ${#instances[@]} -gt 0 ]] || {
   [[ ${#instances[@]} -gt 0 ]] || {
     echo "Unable to find validators"
     echo "Unable to find validators"
     exit 1
     exit 1
   }
   }
   echo "validatorIpList=()" >> "$configFile"
   echo "validatorIpList=()" >> "$configFile"
-  gcloud_ForEachInstance recordInstanceIp validatorIpList
-  gcloud_ForEachInstance waitForStartupComplete
+  cloud_ForEachInstance recordInstanceIp validatorIpList
+  cloud_ForEachInstance waitForStartupComplete
 
 
   echo "clientIpList=()" >> "$configFile"
   echo "clientIpList=()" >> "$configFile"
   echo "Looking for client instances..."
   echo "Looking for client instances..."
-  gcloud_FindInstances "name~^$prefix-client" show
+  cloud_FindInstances "$prefix-client"
   [[ ${#instances[@]} -eq 0 ]] || {
   [[ ${#instances[@]} -eq 0 ]] || {
-    gcloud_ForEachInstance recordInstanceIp clientIpList
-    gcloud_ForEachInstance waitForStartupComplete
+    cloud_ForEachInstance recordInstanceIp clientIpList
+    cloud_ForEachInstance waitForStartupComplete
   }
   }
 
 
   echo "Wrote $configFile"
   echo "Wrote $configFile"
   $metricsWriteDatapoint "testnet-deploy net-config-complete=1"
   $metricsWriteDatapoint "testnet-deploy net-config-complete=1"
 }
 }
 
 
-case $command in
-delete)
+delete() {
   $metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
   $metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
 
 
   # Delete the leader node first to prevent unusual metrics on the dashboard
   # Delete the leader node first to prevent unusual metrics on the dashboard
   # during shutdown.
   # during shutdown.
   # TODO: It would be better to fully cut-off metrics reporting before any
   # TODO: It would be better to fully cut-off metrics reporting before any
   # instances are deleted.
   # instances are deleted.
-  for filter in "^$prefix-leader" "^$prefix-"; do
-    gcloud_FindInstances "name~$filter"
+  for filter in "$prefix-leader" "$prefix-"; do
+    echo "Searching for instances: $filter"
+    cloud_FindInstances "$filter"
 
 
     if [[ ${#instances[@]} -eq 0 ]]; then
     if [[ ${#instances[@]} -eq 0 ]]; then
       echo "No instances found matching '$filter'"
       echo "No instances found matching '$filter'"
     else
     else
-      gcloud_DeleteInstances true
+      cloud_DeleteInstances true
     fi
     fi
   done
   done
   rm -f "$configFile"
   rm -f "$configFile"
 
 
   $metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
   $metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
+
+}
+
+case $command in
+delete)
+  delete
   ;;
   ;;
 
 
 create)
 create)
   [[ -n $validatorNodeCount ]] || usage "Need number of nodes"
   [[ -n $validatorNodeCount ]] || usage "Need number of nodes"
+  if [[ $validatorNodeCount -le 0 ]]; then
+    usage "One or more validator nodes is required"
+  fi
+
+  delete
 
 
   $metricsWriteDatapoint "testnet-deploy net-create-begin=1"
   $metricsWriteDatapoint "testnet-deploy net-create-begin=1"
 
 
   rm -rf "$sshPrivateKey"{,.pub}
   rm -rf "$sshPrivateKey"{,.pub}
-  ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
+
+  # Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa
+  ssh-keygen -t rsa -N '' -f "$sshPrivateKey"
 
 
   printNetworkInfo() {
   printNetworkInfo() {
     cat <<EOF
     cat <<EOF
 ========================================================================================
 ========================================================================================
 
 
 Network composition:
 Network composition:
-  Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})
-  Validators = $validatorNodeCount x $validatorMachineType (GPU=${validatorAccelerator:-none})
-  Client(s) = $clientNodeCount x $clientMachineType (GPU=${clientAccelerator:-none})
+  Leader = $leaderMachineType (GPU=$enableGpu)
+  Validators = $validatorNodeCount x $validatorMachineType
+  Client(s) = $clientNodeCount x $clientMachineType
 
 
 ========================================================================================
 ========================================================================================
 
 
@@ -261,7 +321,7 @@ EOF
   }
   }
   printNetworkInfo
   printNetworkInfo
 
 
-  declare startupScript="$netConfigDir"/gce-startup-script.sh
+  declare startupScript="$netConfigDir"/instance-startup-script.sh
   cat > "$startupScript" <<EOF
   cat > "$startupScript" <<EOF
 #!/bin/bash -ex
 #!/bin/bash -ex
 # autogenerated at $(date)
 # autogenerated at $(date)
@@ -270,11 +330,12 @@ cat > /etc/motd <<EOM
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 
   This instance has not been fully configured.
   This instance has not been fully configured.
-  See "startup-script" log messages in /var/log/syslog for status:
-    $ sudo cat /var/log/syslog | grep startup-script
+
+  See startup script log messages in /var/log/syslog for status:
+    $ sudo cat /var/log/syslog | egrep \\(startup-script\\|cloud-init\)
 
 
   To block until setup is complete, run:
   To block until setup is complete, run:
-    $ until [[ -f /.gce-startup-complete ]]; do sleep 1; done
+    $ until [[ -f /.instance-startup-complete ]]; do sleep 1; done
 
 
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 EOM
 EOM
@@ -296,6 +357,7 @@ $(
   cat \
   cat \
     disable-background-upgrades.sh \
     disable-background-upgrades.sh \
     create-solana-user.sh \
     create-solana-user.sh \
+    add-solana-user-authorized_keys.sh \
     install-earlyoom.sh \
     install-earlyoom.sh \
     install-libssl-compatability.sh \
     install-libssl-compatability.sh \
     install-rsync.sh \
     install-rsync.sh \
@@ -305,21 +367,21 @@ cat > /etc/motd <<EOM
 $(printNetworkInfo)
 $(printNetworkInfo)
 EOM
 EOM
 
 
-touch /.gce-startup-complete
+touch /.instance-startup-complete
 
 
 EOF
 EOF
 
 
-  gcloud_CreateInstances "$prefix-leader" 1 "$zone" \
-    "$imageName" "$leaderMachineType" "$leaderBootDiskSize" "$leaderAccelerator" \
+  cloud_CreateInstances "$prefix" "$prefix-leader" 1 \
+    "$imageName" "$leaderMachineType" "$leaderBootDiskSizeInGb" "$enableGpu" \
     "$startupScript" "$leaderAddress"
     "$startupScript" "$leaderAddress"
 
 
-  gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" \
-    "$imageName" "$validatorMachineType" "$validatorBootDiskSize" "$validatorAccelerator" \
+  cloud_CreateInstances "$prefix" "$prefix-validator" "$validatorNodeCount" \
+    "$imageName" "$validatorMachineType" "$validatorBootDiskSizeInGb" false \
     "$startupScript" ""
     "$startupScript" ""
 
 
   if [[ $clientNodeCount -gt 0 ]]; then
   if [[ $clientNodeCount -gt 0 ]]; then
-    gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" \
-      "$imageName" "$clientMachineType" "$clientBootDiskSize" "$clientAccelerator" \
+    cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
+      "$imageName" "$clientMachineType" "$clientBootDiskSizeInGb" false \
       "$startupScript" ""
       "$startupScript" ""
   fi
   fi
 
 

+ 20 - 0
net/scripts/add-solana-user-authorized_keys.sh

@@ -0,0 +1,20 @@
+#!/bin/bash -ex
+
+[[ $(uname) = Linux ]] || exit 1
+[[ $USER = root ]] || exit 1
+
+[[ -d /home/solana/.ssh ]] || exit 1
+
+# /solana-authorized_keys contains the public keys for users that should
+# automatically be granted access to ALL testnets.
+#
+# To add an entry into this list:
+# 1. Run: ssh-keygen -t ecdsa -N '' -f ~/.ssh/id-solana-testnet
+# 2. Inline ~/.ssh/id-solana-testnet.pub below
+cat > /solana-authorized_keys <<EOF
+ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFBNwLw0i+rI312gWshojFlNw9NV7WfaKeeUsYADqOvM2o4yrO2pPw+sgW8W+/rPpVyH7zU9WVRgTME8NgFV1Vc=
+EOF
+
+sudo -u solana bash -c "
+  cat /solana-authorized_keys >> /home/solana/.ssh/authorized_keys
+"

+ 242 - 0
net/scripts/ec2-provider.sh

@@ -0,0 +1,242 @@
+# |source| this file
+#
+# Utilities for working with EC2 instances
+#
+
+zone=
+region=
+
+cloud_SetZone() {
+  zone="$1"
+  # AWS region is zone with the last character removed
+  region="${zone:0:$((${#zone} - 1))}"
+}
+
+# Set the default zone
+cloud_SetZone "us-east-1b"
+
+# sshPrivateKey should be globally defined whenever this function is called.
+#
+# TODO: Remove usage of the sshPrivateKey global
+__cloud_SshPrivateKeyCheck() {
+  # shellcheck disable=SC2154
+  if [[ -z $sshPrivateKey ]]; then
+    echo Error: sshPrivateKey not defined
+    exit 1
+  fi
+  if [[ ! -r $sshPrivateKey ]]; then
+    echo "Error: file is not readable: $sshPrivateKey"
+    exit 1
+  fi
+}
+
+#
+# __cloud_FindInstances
+#
+# Find instances with name matching the specified pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# filter   - The instances to filter on
+#
+# examples:
+#   $ __cloud_FindInstances "exact-machine-name"
+#   $ __cloud_FindInstances "all-machines-with-a-common-machine-prefix*"
+#
+__cloud_FindInstances() {
+  declare filter="$1"
+
+  instances=()
+  declare name publicIp privateIp
+  while read -r name publicIp privateIp; do
+    printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp"
+    instances+=("$name:$publicIp:$privateIp")
+  done < <(aws ec2 describe-instances \
+             --region "$region" \
+             --filters \
+               "Name=tag:name,Values=$filter" \
+               "Name=instance-state-name,Values=pending,running" \
+             --query "Reservations[].Instances[].[InstanceId,PublicIpAddress,PrivateIpAddress]" \
+             --output text
+    )
+}
+
+#
+# cloud_FindInstances [namePrefix]
+#
+# Find instances with names matching the specified prefix
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# namePrefix - The instance name prefix to look for
+#
+# examples:
+#   $ cloud_FindInstances all-machines-with-a-common-machine-prefix
+#
+cloud_FindInstances() {
+  declare namePrefix="$1"
+  __cloud_FindInstances "$namePrefix*"
+}
+
+#
+# cloud_FindInstance [name]
+#
+# Find an instance with a name matching the exact pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# name - The instance name to look for
+#
+# examples:
+#   $ cloud_FindInstance exact-machine-name
+#
+cloud_FindInstance() {
+  declare name="$1"
+  __cloud_FindInstances "$name"
+}
+
+
+#
+# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
+#                       [machineType] [bootDiskSize] [enableGpu]
+#                       [startupScript] [address]
+#
+# Creates one more identical instances.
+#
+# networkName   - unique name of this testnet
+# namePrefix    - unique string to prefix all the instance names with
+# numNodes      - number of instances to create
+# imageName     - Disk image for the instances
+# machineType   - GCE machine type
+# bootDiskSize  - Optional size of the boot disk in GB
+# enableGpu     - Optionally enable GPU, use the value "true" to enable
+#                 eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
+# startupScript - Optional startup script to execute when the instance boots
+# address       - Optional name of the GCE static IP address to attach to the
+#                 instance.  Requires that |numNodes| = 1 and that addressName
+#                 has been provisioned in the GCE region that is hosting |zone|
+#
+# Tip: use cloud_FindInstances to locate the instances once this function
+#      returns
+cloud_CreateInstances() {
+  declare networkName="$1"
+  declare namePrefix="$2"
+  declare numNodes="$3"
+  declare imageName="$4"
+  declare machineType="$5"
+  declare optionalBootDiskSize="$6"
+  declare optionalGpu="$7"
+  declare optionalStartupScript="$8"
+  declare optionalAddress="$9"
+
+  __cloud_SshPrivateKeyCheck
+  (
+    set -x
+    aws ec2 delete-key-pair --region "$region" --key-name "$networkName"
+    aws ec2 import-key-pair --region "$region" --key-name "$networkName" \
+      --public-key-material file://"${sshPrivateKey}".pub
+  )
+
+  declare -a args
+  args=(
+    --key-name "$networkName"
+    --count "$numNodes"
+    --region "$region"
+    --placement "AvailabilityZone=$zone"
+    --security-groups testnet
+    --image-id "$imageName"
+    --instance-type "$machineType"
+    --tag-specifications "ResourceType=instance,Tags=[{Key=name,Value=$namePrefix}]"
+  )
+  if [[ -n $optionalBootDiskSize ]]; then
+    args+=(
+      --block-device-mapping "[{\"DeviceName\": \"/dev/sda1\", \"Ebs\": { \"VolumeSize\": $optionalBootDiskSize }}]"
+    )
+  fi
+  if [[ $optionalGpu = true ]]; then
+    echo TODO: GPU support not implemented yet
+    exit 1
+  fi
+  if [[ -n $optionalStartupScript ]]; then
+    args+=(
+      --user-data "file://$optionalStartupScript"
+    )
+  fi
+
+  if [[ -n $optionalAddress ]]; then
+    [[ $numNodes = 1 ]] || {
+      echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
+      exit 1
+    }
+  fi
+
+  (
+    set -x
+    aws ec2 run-instances "${args[@]}"
+  )
+
+  if [[ -n $optionalAddress ]]; then
+    cloud_FindInstance "$namePrefix"
+    if [[ ${#instances[@]} -ne 1 ]]; then
+      echo "Failed to find newly created instance: $namePrefix"
+    fi
+
+    declare instanceId
+    IFS=: read -r instanceId _ < <(echo "${instances[0]}")
+    aws ec2 associate-address \
+      --instance-id "$instanceId" \
+      --region "region" \
+      --allocation-id "$optionalAddress"
+  fi
+}
+
+#
+# cloud_DeleteInstances
+#
+# Deletes all the instances listed in the `instances` array
+#
+cloud_DeleteInstances() {
+  if [[ ${#instances[0]} -eq 0 ]]; then
+    echo No instances to delete
+    return
+  fi
+  declare names=("${instances[@]/:*/}")
+  (
+    set -x
+    aws ec2 terminate-instances --region "$region" --instance-ids "${names[@]}"
+  )
+}
+
+
+#
+# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
+#
+# Fetch a file from the given instance.  This function uses a cloud-specific
+# mechanism to fetch the file
+#
+cloud_FetchFile() {
+  # shellcheck disable=SC2034 # instanceName is unused
+  declare instanceName="$1"
+  declare publicIp="$2"
+  declare remoteFile="$3"
+  declare localFile="$4"
+
+  __cloud_SshPrivateKeyCheck
+  (
+    set -x
+    scp \
+      -o "StrictHostKeyChecking=no" \
+      -o "UserKnownHostsFile=/dev/null" \
+      -o "User=solana" \
+      -o "IdentityFile=$sshPrivateKey" \
+      -o "LogLevel=ERROR" \
+      -F /dev/null \
+      "solana@$publicIp:$remoteFile" "$localFile"
+  )
+}

+ 201 - 0
net/scripts/gce-provider.sh

@@ -0,0 +1,201 @@
+# |source| this file
+#
+# Utilities for working with GCE instances
+#
+
+# Default zone
+zone="us-west1-b"
+cloud_SetZone() {
+  zone="$1"
+}
+
+
+#
+# __cloud_FindInstances
+#
+# Find instances matching the specified pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:zone:public IP:private IP"
+#
+# filter   - The instances to filter on
+#
+# examples:
+#   $ __cloud_FindInstances "name=exact-machine-name"
+#   $ __cloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
+#
+__cloud_FindInstances() {
+  declare filter="$1"
+  instances=()
+
+  declare name zone publicIp privateIp status
+  while read -r name publicIp privateIp status; do
+    if [[ $status != RUNNING ]]; then
+      echo "Warning: $name is not RUNNING, ignoring it."
+      continue
+    fi
+    printf "%-30s | publicIp=%-16s privateIp=%s\n" "$name" "$publicIp" "$privateIp"
+
+    instances+=("$name:$publicIp:$privateIp")
+  done < <(gcloud compute instances list \
+             --filter="$filter" \
+             --format 'value(name,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
+}
+#
+# cloud_FindInstances [namePrefix]
+#
+# Find instances with names matching the specified prefix
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# namePrefix - The instance name prefix to look for
+#
+# examples:
+#   $ cloud_FindInstances all-machines-with-a-common-machine-prefix
+#
+cloud_FindInstances() {
+  declare namePrefix="$1"
+  __cloud_FindInstances "name~^$namePrefix"
+}
+
+#
+# cloud_FindInstance [name]
+#
+# Find an instance with a name matching the exact pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# name - The instance name to look for
+#
+# examples:
+#   $ cloud_FindInstance exact-machine-name
+#
+cloud_FindInstance() {
+  declare name="$1"
+  __cloud_FindInstances "name=$name"
+}
+
+#
+# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
+#                       [machineType] [bootDiskSize] [enableGpu]
+#                       [startupScript] [address]
+#
+# Creates one more identical instances.
+#
+# networkName   - unique name of this testnet
+# namePrefix    - unique string to prefix all the instance names with
+# numNodes      - number of instances to create
+# imageName     - Disk image for the instances
+# machineType   - GCE machine type
+# bootDiskSize  - Optional size of the boot disk in GB
+# enableGpu     - Optionally enable GPU, use the value "true" to enable
+#                 eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
+# startupScript - Optional startup script to execute when the instance boots
+# address       - Optional name of the GCE static IP address to attach to the
+#                 instance.  Requires that |numNodes| = 1 and that addressName
+#                 has been provisioned in the GCE region that is hosting `$zone`
+#
+# Tip: use cloud_FindInstances to locate the instances once this function
+#      returns
+cloud_CreateInstances() {
+  declare networkName="$1"
+  declare namePrefix="$2"
+  declare numNodes="$3"
+  declare imageName="$4"
+  declare machineType="$5"
+  declare optionalBootDiskSize="$6"
+  declare optionalGpu="$7"
+  declare optionalStartupScript="$8"
+  declare optionalAddress="$9"
+
+  declare nodes
+  if [[ $numNodes = 1 ]]; then
+    nodes=("$namePrefix")
+  else
+    read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes")
+  fi
+
+  declare -a args
+  args=(
+    "--zone=$zone"
+    "--tags=testnet"
+    "--metadata=testnet=$networkName"
+    "--image=$imageName"
+    "--machine-type=$machineType"
+  )
+  if [[ -n $optionalBootDiskSize ]]; then
+    args+=(
+      "--boot-disk-size=${optionalBootDiskSize}GB"
+    )
+  fi
+  if [[ $optionalGpu = true ]]; then
+    args+=(
+      "--accelerator=count=4,type=nvidia-tesla-k80"
+      --maintenance-policy TERMINATE
+      --restart-on-failure
+    )
+  fi
+  if [[ -n $optionalStartupScript ]]; then
+    args+=(
+      --metadata-from-file "startup-script=$optionalStartupScript"
+    )
+  fi
+
+  if [[ -n $optionalAddress ]]; then
+    [[ $numNodes = 1 ]] || {
+      echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
+      exit 1
+    }
+    args+=(
+      "--address=$optionalAddress"
+    )
+  fi
+
+  (
+    set -x
+    gcloud beta compute instances create "${nodes[@]}" "${args[@]}"
+  )
+}
+
+#
+# cloud_DeleteInstances
+#
+# Deletes all the instances listed in the `instances` array
+#
+cloud_DeleteInstances() {
+  if [[ ${#instances[0]} -eq 0 ]]; then
+    echo No instances to delete
+    return
+  fi
+  declare names=("${instances[@]/:*/}")
+
+  (
+    set -x
+    gcloud beta compute instances delete --zone "$zone" --quiet "${names[@]}"
+  )
+}
+
+
+#
+# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
+#
+# Fetch a file from the given instance.  This function uses a cloud-specific
+# mechanism to fetch the file
+#
+cloud_FetchFile() {
+  declare instanceName="$1"
+  # shellcheck disable=SC2034 # publicIp is unused
+  declare publicIp="$2"
+  declare remoteFile="$3"
+  declare localFile="$4"
+
+  (
+    set -x
+    gcloud compute scp --zone "$zone" "$instanceName:$remoteFile" "$localFile"
+  )
+}

+ 0 - 187
net/scripts/gcloud.sh

@@ -1,187 +0,0 @@
-# |source| this file
-#
-# Utilities for working with gcloud
-#
-
-
-#
-# gcloud_FindInstances [filter] [options]
-#
-# Find instances matching the specified pattern.
-#
-# For each matching instance, an entry in the `instances` array will be added with the
-# following information about the instance:
-#   "name:zone:public IP:private IP"
-#
-# filter   - The instances to filter on
-# options  - If set to the string "show", the list of instances will be echoed
-#            to stdout
-#
-# examples:
-#   $ gcloud_FindInstances "name=exact-machine-name"
-#   $ gcloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
-#
-gcloud_FindInstances() {
-  declare filter="$1"
-  declare options="$2"
-  instances=()
-
-  declare name zone publicIp privateIp status
-  while read -r name zone publicIp privateIp status; do
-    if [[ $status != RUNNING ]]; then
-      echo "Warning: $name is not RUNNING, ignoring it."
-      continue
-    fi
-    if [[ $options = show ]]; then
-      printf "%-30s | %-16s publicIp=%-16s privateIp=%s\n" "$name" "$zone" "$publicIp" "$privateIp"
-    fi
-
-    instances+=("$name:$zone:$publicIp:$privateIp")
-  done < <(gcloud compute instances list \
-             --filter="$filter" \
-             --format 'value(name,zone,networkInterfaces[0].accessConfigs[0].natIP,networkInterfaces[0].networkIP,status)')
-}
-
-#
-# gcloud_ForEachInstance [cmd] [extra args to cmd]
-#
-# Execute a command for each element in the `instances` array
-#
-#   cmd   - The command to execute on each instance
-#           The command will receive arguments followed by any
-#           additionl arguments supplied to gcloud_ForEachInstance:
-#               name     - name of the instance
-#               zone     - zone the instance is located in
-#               publicIp - The public IP address of this instance
-#               privateIp - The priate IP address of this instance
-#               count    - Monotonically increasing count for each
-#                          invocation of cmd, starting at 1
-#               ...      - Extra args to cmd..
-#
-#
-gcloud_ForEachInstance() {
-  declare cmd="$1"
-  shift
-  [[ -n $cmd ]] || { echo gcloud_ForEachInstance: cmd not specified; exit 1; }
-
-  declare count=1
-  for info in "${instances[@]}"; do
-    declare name zone publicIp privateIp
-    IFS=: read -r name zone publicIp privateIp < <(echo "$info")
-
-    eval "$cmd" "$name" "$zone" "$publicIp" "$privateIp" "$count" "$@"
-    count=$((count + 1))
-  done
-}
-
-#
-# gcloud_CreateInstances [namePrefix] [numNodes] [zone] [imageName]
-#                        [machineType] [bootDiskSize] [accelerator]
-#                        [startupScript] [address]
-#
-# Creates one more identical instances.
-#
-# namePrefix    - unique string to prefix all the instance names with
-# numNodes      - number of instances to create
-# zone          - zone to create the instances in
-# imageName     - Disk image for the instances
-# machineType   - GCE machine type
-# bootDiskSize  - Optional disk of the boot disk
-# accelerator   - Optional accelerator to attach to the instance(s), see
-#                 eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
-# startupScript - Optional startup script to execute when the instance boots
-# address       - Optional name of the GCE static IP address to attach to the
-#                 instance.  Requires that |numNodes| = 1 and that addressName
-#                 has been provisioned in the GCE region that is hosting |zone|
-#
-# Tip: use gcloud_FindInstances to locate the instances once this function
-#      returns
-gcloud_CreateInstances() {
-  declare namePrefix="$1"
-  declare numNodes="$2"
-  declare zone="$3"
-  declare imageName="$4"
-  declare machineType="$5"
-  declare optionalBootDiskSize="$6"
-  declare optionalAccelerator="$7"
-  declare optionalStartupScript="$8"
-  declare optionalAddress="$9"
-
-  declare nodes
-  if [[ $numNodes = 1 ]]; then
-    nodes=("$namePrefix")
-  else
-    read -ra nodes <<<$(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes")
-  fi
-
-  declare -a args
-  args=(
-    "--zone=$zone"
-    "--tags=testnet"
-    "--image=$imageName"
-    "--machine-type=$machineType"
-  )
-  if [[ -n $optionalBootDiskSize ]]; then
-    args+=(
-      "--boot-disk-size=$optionalBootDiskSize"
-    )
-  fi
-  if [[ -n $optionalAccelerator ]]; then
-    args+=(
-      "--accelerator=$optionalAccelerator"
-      --maintenance-policy TERMINATE
-      --restart-on-failure
-    )
-  fi
-  if [[ -n $optionalStartupScript ]]; then
-    args+=(
-      --metadata-from-file "startup-script=$optionalStartupScript"
-    )
-  fi
-
-  if [[ -n $optionalAddress ]]; then
-    [[ $numNodes = 1 ]] || {
-      echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
-      exit 1
-    }
-    args+=(
-      "--address=$optionalAddress"
-    )
-  fi
-
-  (
-    set -x
-    gcloud beta compute instances create "${nodes[@]}" "${args[@]}"
-  )
-}
-
-#
-# gcloud_DeleteInstances [yes]
-#
-# Deletes all the instances listed in the `instances` array
-#
-# If yes = "true", skip the delete confirmation
-#
-gcloud_DeleteInstances() {
-  declare maybeQuiet=
-  if [[ $1 = true ]]; then
-    maybeQuiet=--quiet
-  fi
-
-  if [[ ${#instances[0]} -eq 0 ]]; then
-    echo No instances to delete
-    return
-  fi
-  declare names=("${instances[@]/:*/}")
-
-  # Assume all instances are in the same zone
-  # TODO: One day this assumption will be invalid
-  declare zone
-  IFS=: read -r _ zone _ < <(echo "${instances[0]}")
-
-  (
-    set -x
-    gcloud beta compute instances delete --zone "$zone" $maybeQuiet "${names[@]}"
-  )
-}
-