Browse Source

Add script for managing colo resourse ala gce.sh (#5854)

automerge
Trent Nelson 6 năm trước cách đây
mục cha
commit
2636a9c9f1

+ 1 - 0
net/colo.sh

@@ -0,0 +1 @@
+gce.sh

+ 25 - 12
net/gce.sh

@@ -49,6 +49,18 @@ azure)
   blockstreamerMachineType=Standard_D16s_v3
   blockstreamerMachineType=Standard_D16s_v3
   replicatorMachineType=Standard_D4s_v3
   replicatorMachineType=Standard_D4s_v3
   ;;
   ;;
+colo)
+  # shellcheck source=net/scripts/colo-provider.sh
+  source "$here"/scripts/colo-provider.sh
+
+  cpuBootstrapLeaderMachineType=0
+  gpuBootstrapLeaderMachineType=1
+  bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
+  fullNodeMachineType=$cpuBootstrapLeaderMachineType
+  clientMachineType=0
+  blockstreamerMachineType=0
+  replicatorMachineType=0
+  ;;
 *)
 *)
   echo "Error: Unknown cloud provider: $cloudProvider"
   echo "Error: Unknown cloud provider: $cloudProvider"
   ;;
   ;;
@@ -95,6 +107,7 @@ Manage testnet instances
  config - configure the testnet and write a config file describing it
  config - configure the testnet and write a config file describing it
  delete - delete the testnet
  delete - delete the testnet
  info   - display information about the currently configured testnet
  info   - display information about the currently configured testnet
+ status - display status information of all resources
 
 
  common options:
  common options:
    -p [prefix]      - Optional common prefix for instance names to avoid
    -p [prefix]      - Optional common prefix for instance names to avoid
@@ -147,7 +160,7 @@ EOF
 command=$1
 command=$1
 [[ -n $command ]] || usage
 [[ -n $command ]] || usage
 shift
 shift
-[[ $command = create || $command = config || $command = info || $command = delete ]] ||
+[[ $command = create || $command = config || $command = info || $command = delete || $command = status ]] ||
   usage "Invalid command: $command"
   usage "Invalid command: $command"
 
 
 shortArgs=()
 shortArgs=()
@@ -243,12 +256,7 @@ fi
 case $cloudProvider in
 case $cloudProvider in
 gce)
 gce)
   ;;
   ;;
-ec2)
-  if [[ -n $fullNodeAdditionalDiskSizeInGb ]] ; then
-    usage "Error: --fullnode-additional-disk-size-gb currently only supported with cloud provider: gce"
-  fi
-  ;;
-azure)
+ec2|azure|colo)
   if [[ -n $fullNodeAdditionalDiskSizeInGb ]] ; then
   if [[ -n $fullNodeAdditionalDiskSizeInGb ]] ; then
     usage "Error: --fullnode-additional-disk-size-gb currently only supported with cloud provider: gce"
     usage "Error: --fullnode-additional-disk-size-gb currently only supported with cloud provider: gce"
   fi
   fi
@@ -682,7 +690,8 @@ EOF
   else
   else
     cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
     cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
       "$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
       "$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
-      "$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb"
+      "$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
+      "$sshPrivateKey"
   fi
   fi
 
 
   if [[ $additionalFullNodeCount -gt 0 ]]; then
   if [[ $additionalFullNodeCount -gt 0 ]]; then
@@ -702,7 +711,8 @@ EOF
       fi
       fi
       cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
       cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
         "$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
         "$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
-        "$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" &
+        "$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
+        "$sshPrivateKey" &
     done
     done
 
 
     wait
     wait
@@ -711,19 +721,19 @@ EOF
   if [[ $clientNodeCount -gt 0 ]]; then
   if [[ $clientNodeCount -gt 0 ]]; then
     cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
     cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
       "$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
       "$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
-      "$startupScript" "" "$bootDiskType" ""
+      "$startupScript" "" "$bootDiskType" "" "$sshPrivateKey"
   fi
   fi
 
 
   if $blockstreamer; then
   if $blockstreamer; then
     cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
     cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
       "$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
       "$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
-      "$startupScript" "$blockstreamerAddress" "$bootDiskType" ""
+      "$startupScript" "$blockstreamerAddress" "$bootDiskType" "" "$sshPrivateKey"
   fi
   fi
 
 
   if [[ $replicatorNodeCount -gt 0 ]]; then
   if [[ $replicatorNodeCount -gt 0 ]]; then
     cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
     cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
       false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
       false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
-      "$startupScript" "" "" ""
+      "$startupScript" "" "" "" "$sshPrivateKey"
   fi
   fi
 
 
   $metricsWriteDatapoint "testnet-deploy net-create-complete=1"
   $metricsWriteDatapoint "testnet-deploy net-create-complete=1"
@@ -776,6 +786,9 @@ info)
     printNode replicator "$ipAddress" "$ipAddressPrivate" "$zone"
     printNode replicator "$ipAddress" "$ipAddressPrivate" "$zone"
   done
   done
   ;;
   ;;
+status)
+  cloud_StatusAll
+  ;;
 *)
 *)
   usage "Unknown command: $command"
   usage "Unknown command: $command"
 esac
 esac

+ 1 - 7
net/scripts/add-datacenter-solana-user-authorized_keys.sh

@@ -1,11 +1,6 @@
 #!/usr/bin/env bash
 #!/usr/bin/env bash
 set -ex
 set -ex
 
 
-[[ $(uname) = Linux ]] || exit 1
-[[ $USER = root ]] || exit 1
-
-[[ -d /home/solana/.ssh ]] || mkdir -p /home/solana/.ssh
-
 cd "$(dirname "$0")"
 cd "$(dirname "$0")"
 
 
 # shellcheck source=net/scripts/solana-user-authorized_keys.sh
 # shellcheck source=net/scripts/solana-user-authorized_keys.sh
@@ -14,7 +9,6 @@ source solana-user-authorized_keys.sh
 # solana-user-authorized_keys.sh defines the public keys for users that should
 # solana-user-authorized_keys.sh defines the public keys for users that should
 # automatically be granted access to ALL datacenter nodes.
 # automatically be granted access to ALL datacenter nodes.
 for i in "${!SOLANA_USERS[@]}"; do
 for i in "${!SOLANA_USERS[@]}"; do
-  echo "environment=\"SOLANA_USER=${SOLANA_USERS[i]}\" ${SOLANA_PUBKEYS[i]}" >> /solana-authorized_keys
+  echo "environment=\"SOLANA_USER=${SOLANA_USERS[i]}\" ${SOLANA_PUBKEYS[i]}"
 done
 done
 
 
-sudo -u solana mv /solana-authorized_keys /home/solana/.ssh/authorized_keys

+ 9 - 1
net/scripts/azure-provider.sh

@@ -319,4 +319,12 @@ cloud_FetchFile() {
 cloud_CreateAndAttachPersistentDisk() {
 cloud_CreateAndAttachPersistentDisk() {
   echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for azure"
   echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for azure"
   exit 1
   exit 1
-}
+}
+
+#
+# cloud_StatusAll
+#
+# Not yet implemented for this cloud provider
+cloud_StatusAll() {
+  echo "ERROR: cloud_StatusAll is not yet implemented for azure"
+}

+ 276 - 0
net/scripts/colo-provider.sh

@@ -0,0 +1,276 @@
+#!/usr/bin/env bash
+
+# |source| this file
+#
+# Utilities for working with Colo instances
+#
+
+declare COLO_TODO_PARALLELIZE=false
+
+__cloud_colo_here="$(dirname "${BASH_SOURCE[0]}")"
+# shellcheck source=net/scripts/colo-utils.sh
+source "${__cloud_colo_here}/colo-utils.sh"
+
+# Default zone
+cloud_DefaultZone() {
+  echo "Denver"
+}
+
+#
+# __cloud_FindInstances
+#
+# Find instances matching the specified pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:zone:public IP:private IP"
+#
+# filter   - The instances to filter on
+#
+# examples:
+#   $ __cloud_FindInstances "name=exact-machine-name"
+#   $ __cloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
+#
+__cloud_FindInstances() {
+  declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INSTANCES_TEXT
+  declare filter=$1
+  instances=()
+
+  if ! $COLO_TODO_PARALLELIZE; then
+    colo_load_resources
+    colo_load_availability false
+  fi
+  INSTANCES_TEXT="$(
+    for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do
+      IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$AVAIL"
+      if [[ $INSTNAME =~ $filter ]]; then
+        IP=$PRIV_IP  # Colo public IPs are firewalled to only allow UDP(8000-10000).  Reuse private IP as public and require VPN
+        printf "%-40s | publicIp=%-16s privateIp=%s zone=%s\n" "$INSTNAME" "$IP" "$PRIV_IP" "$ZONE" 1>&2
+        echo -e "${INSTNAME}:${IP}:${PRIV_IP}:$ZONE"
+      fi
+    done | sort -t $'\v' -k1
+  )"
+  if [[ -n "$INSTANCES_TEXT" ]]; then
+    while read -r LINE; do
+      instances+=( "$LINE" )
+    done <<<"$INSTANCES_TEXT"
+  fi
+}
+
+#
+# cloud_FindInstances [namePrefix]
+#
+# Find instances with names matching the specified prefix
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# namePrefix - The instance name prefix to look for
+#
+# examples:
+#   $ cloud_FindInstances all-machines-with-a-common-machine-prefix
+#
+cloud_FindInstances() {
+  declare filter="^${1}.*"
+  __cloud_FindInstances "$filter"
+}
+
+#
+# cloud_FindInstance [name]
+#
+# Find an instance with a name matching the exact pattern.
+#
+# For each matching instance, an entry in the `instances` array will be added with the
+# following information about the instance:
+#   "name:public IP:private IP"
+#
+# name - The instance name to look for
+#
+# examples:
+#   $ cloud_FindInstance exact-machine-name
+#
+cloud_FindInstance() {
+  declare name="^${1}$"
+  __cloud_FindInstances "$name"
+}
+
+#
+# cloud_Initialize [networkName]
+#
+# Perform one-time initialization that may be required for the given testnet.
+#
+# networkName   - unique name of this testnet
+#
+# This function will be called before |cloud_CreateInstances|
+cloud_Initialize() {
+  # networkName=$1 # unused
+  # zone=$2 #unused
+  colo_load_resources
+  if $COLO_TODO_PARALLELIZE; then
+    colo_load_availability
+  fi
+}
+
+#
+# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
+#                       [machineType] [bootDiskSize] [enableGpu]
+#                       [startupScript] [address]
+#
+# Creates one more identical instances.
+#
+# networkName   - unique name of this testnet
+# namePrefix    - unique string to prefix all the instance names with
+# numNodes      - number of instances to create
+# imageName     - Disk image for the instances
+# machineType   - GCE machine type.  Note that this may also include an
+#                 `--accelerator=` or other |gcloud compute instances create|
+#                 options
+# bootDiskSize  - Optional size of the boot disk in GB
+# enableGpu     - Optionally enable GPU, use the value "true" to enable
+#                 eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
+# startupScript - Optional startup script to execute when the instance boots
+# address       - Optional name of the GCE static IP address to attach to the
+#                 instance.  Requires that |numNodes| = 1 and that addressName
+#                 has been provisioned in the GCE region that is hosting `$zone`
+# bootDiskType  - Optional specify SSD or HDD boot disk
+# additionalDiskSize - Optional specify size of additional storage volume
+#
+# Tip: use cloud_FindInstances to locate the instances once this function
+#      returns
+cloud_CreateInstances() {
+  #declare networkName="$1" # unused
+  declare namePrefix="$2"
+  declare numNodes="$3"
+  #declare enableGpu="$4" # unused
+  declare machineType="$5"
+  # declare zone="$6" # unused
+  #declare optionalBootDiskSize="$7" # unused
+  #declare optionalStartupScript="$8" # unused
+  #declare optionalAddress="$9" # unused
+  #declare optionalBootDiskType="${10}" # unused
+  #declare optionalAdditionalDiskSize="${11}" # unused
+  declare sshPrivateKey="${12}"
+
+  declare -a nodes
+  if [[ $numNodes = 1 ]]; then
+    nodes=("$namePrefix")
+  else
+    for node in $(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes"); do
+      nodes+=("$node")
+    done
+  fi
+
+  if $COLO_TODO_PARALLELIZE; then
+    declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INDEX RES LINE
+    declare -a AVAILABLE
+    declare AVAILABLE_TEXT
+    AVAILABLE_TEXT="$(
+      for RES in "${COLO_RES_AVAILABILITY[@]}"; do
+        IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$RES"
+        if [[ "FREE" = "$STATUS" ]]; then
+          INDEX=$(colo_res_index_from_ip "$IP")
+          RES_MACH="${COLO_RES_MACHINE[$INDEX]}"
+          if colo_machine_types_compatible "$RES_MACH" "$machineType"; then
+            if ! colo_node_is_requisitioned "$INDEX" "${COLO_RES_REQUISITIONED[*]}"; then
+              echo -e "$RES_MACH\v$IP"
+            fi
+          fi
+        fi
+      done | sort -nt $'\v' -k1,1
+    )"
+
+    if [[ -n "$AVAILABLE_TEXT" ]]; then
+      while read -r LINE; do
+        AVAILABLE+=("$LINE")
+      done <<<"$AVAILABLE_TEXT"
+    fi
+
+    if [[ ${#AVAILABLE[@]} -lt $numNodes ]]; then
+      echo "Insufficient resources available to allocate $numNodes $namePrefix" 1>&2
+      exit 1
+    fi
+
+    declare node
+    declare AI=0
+    for node in "${nodes[@]}"; do
+      IFS=$'\v' read -r _ IP <<<"${AVAILABLE[$AI]}"
+      colo_node_requisition "$IP" "$node" >/dev/null
+      AI=$((AI+1))
+    done
+  else
+    declare RES_MACH node
+    declare RI=0
+    declare NI=0
+    while [[ $NI -lt $numNodes && $RI -lt $COLO_RES_N ]]; do
+      node="${nodes[$NI]}"
+      RES_MACH="${COLO_RES_MACHINE[$RI]}"
+      IP="${COLO_RES_IP_PRIV[$RI]}"
+      if colo_machine_types_compatible "$RES_MACH" "$machineType"; then
+        if colo_node_requisition "$IP" "$node" "$sshPrivateKey" >/dev/null; then
+          NI=$((NI+1))
+        fi
+      fi
+      RI=$((RI+1))
+    done
+  fi
+}
+
+#
+# cloud_DeleteInstances
+#
+# Deletes all the instances listed in the `instances` array
+#
+cloud_DeleteInstances() {
+  declare _ IP _ _
+  for instance in "${instances[@]}"; do
+    IFS=':' read -r _ IP _ _ <<< "$instance"
+    colo_node_free "$IP" >/dev/null
+  done
+}
+
+#
+# cloud_WaitForInstanceReady [instanceName] [instanceIp] [instanceZone] [timeout]
+#
+# Return once the newly created VM instance is responding.  This function is cloud-provider specific.
+#
+cloud_WaitForInstanceReady() {
+  #declare instanceName="$1" # unused
+  declare instanceIp="$2"
+  declare timeout="$4"
+
+  timeout "${timeout}"s bash -c "set -o pipefail; until ping -c 3 $instanceIp | tr - _; do echo .; done"
+}
+
+#
+# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
+#
+# Fetch a file from the given instance.  This function uses a cloud-specific
+# mechanism to fetch the file
+#
+cloud_FetchFile() {
+  #declare instanceName="$1" # unused
+  declare publicIp="$2"
+  declare remoteFile="$3"
+  declare localFile="$4"
+  #declare zone="$5" # unused
+  scp \
+    -o "StrictHostKeyChecking=no" \
+    -o "UserKnownHostsFile=/dev/null" \
+    -o "User=solana" \
+    -o "LogLevel=ERROR" \
+    -F /dev/null \
+    "solana@$publicIp:$remoteFile" "$localFile"
+}
+
+cloud_StatusAll() {
+  declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME
+  if ! $COLO_TODO_PARALLELIZE; then
+    colo_load_resources
+    colo_load_availability false
+  fi
+  for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do
+    IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$AVAIL"
+    printf "%-30s | publicIp=%-16s privateIp=%s status=%s who=%s zone=%s inst=%s\n" "$HOST_NAME" "$IP" "$PRIV_IP" "$STATUS" "$LOCK_USER" "$ZONE" "$INSTNAME"
+  done
+}

+ 277 - 0
net/scripts/colo-utils.sh

@@ -0,0 +1,277 @@
+#!/usr/bin/env bash
+
+declare -r SOLANA_LOCK_FILE="/home/solana/.solana.lock"
+
+__colo_here="$(dirname "${BASH_SOURCE[0]}")"
+# Load colo resource specs
+export COLO_RES_N=0
+export COLO_RES_HOSTNAME=()
+export COLO_RES_IP=()
+export COLO_RES_IP_PRIV=()
+export COLO_RES_CPU_CORES=()
+export COLO_RES_RAM_GB=()
+export COLO_RES_STORAGE_TYPE=()
+export COLO_RES_STORAGE_CAP_GB=()
+export COLO_RES_ADD_STORAGE_TYPE=()
+export COLO_RES_ADD_STORAGE_CAP_GB=()
+export COLO_RES_MACHINE=()
+
+export COLO_RESOURCES_LOADED=false
+colo_load_resources() {
+  if ! ${COLO_RESOURCES_LOADED}; then
+    while read -r LINE; do
+      IFS='|' read -r H I PI C M ST SC AST ASC G Z <<<"$LINE"
+      COLO_RES_HOSTNAME+=( "$H" )
+      COLO_RES_IP+=( "$I" )
+      COLO_RES_IP_PRIV+=( "$PI" )
+      COLO_RES_CPU_CORES+=( "$C" )
+      COLO_RES_RAM_GB+=( "$M" )
+      COLO_RES_STORAGE_TYPE+=( "$ST" )
+      COLO_RES_STORAGE_CAP_GB+=( "$SC" )
+      COLO_RES_ADD_STORAGE_TYPE+=( "$(tr ',' $'\v' <<<"$AST")" )
+      COLO_RES_ADD_STORAGE_CAP_GB+=( "$(tr ',' $'\v' <<<"$ASC")" )
+      COLO_RES_MACHINE+=( "$G" )
+      COLO_RES_ZONE+=( "$Z" )
+      COLO_RES_N=$((COLO_RES_N+1))
+    done < <(sort -nt'|' -k10,10 "$__colo_here"/colo_nodes)
+    COLO_RESOURCES_LOADED=true
+  fi
+}
+
+declare COLO_RES_AVAILABILITY_CACHED=false
+declare -ax COLO_RES_AVAILABILITY
+colo_load_availability() {
+  declare USE_CACHE=${1:-${COLO_RES_AVAILABILITY_CACHED}}
+  declare LINE PRIV_IP STATUS LOCK_USER I IP HOST_NAME ZONE INSTNAME
+  if ! $USE_CACHE; then
+    COLO_RES_AVAILABILITY=()
+    COLO_RES_REQUISITIONED=()
+    while read -r LINE; do
+      IFS=$'\v' read -r PRIV_IP STATUS LOCK_USER INSTNAME <<< "$LINE"
+      I=$(colo_res_index_from_ip "$PRIV_IP")
+      IP="${COLO_RES_IP[$I]}"
+      HOST_NAME="${COLO_RES_HOSTNAME[$I]}"
+      ZONE="${COLO_RES_ZONE[$I]}"
+      COLO_RES_AVAILABILITY+=( "$(echo -e "$HOST_NAME\v$IP\v$PRIV_IP\v$STATUS\v$ZONE\v$LOCK_USER\v$INSTNAME")" )
+    done < <(colo_node_status_all | sort -t $'\v' -k1)
+    COLO_RES_AVAILABILITY_CACHED=true
+  fi
+}
+
+colo_res_index_from_ip() {
+  declare IP="$1"
+  for i in "${!COLO_RES_IP_PRIV[@]}"; do
+    if [ "$IP" = "${COLO_RES_IP_PRIV[$i]}" ]; then
+      echo "$i"
+      return 0
+    fi
+  done
+  return 1
+}
+
+colo_instance_run() {
+  declare IP=$1
+  declare CMD="$2"
+  declare OUT
+  set +e
+  OUT=$(ssh -l solana -o "ConnectTimeout=3" -n "$IP" "$CMD" 2>&1)
+  declare RC=$?
+  set -e
+  while read -r LINE; do
+    echo -e "$IP\v$RC\v$LINE"
+  done <<< "$OUT"
+  return $RC
+}
+
+colo_instance_run_foreach() {
+  declare CMD
+  if test 1 -eq $#; then
+    CMD="$1"
+    declare IPS=()
+    for I in $(seq 0 $((COLO_RES_N-1))); do
+      IPS+=( "${COLO_RES_IP_PRIV[$I]}" )
+    done
+    set "${IPS[@]}" "$CMD"
+  fi
+  CMD="${*: -1}"
+  for I in $(seq 0 $(($#-2))); do
+    declare IP="$1"
+    colo_instance_run "$IP" "$CMD" &
+    shift
+  done
+
+  wait
+}
+
+colo_whoami() {
+  declare ME LINE SOL_USER
+  while read -r LINE; do
+    declare IP RC
+    IFS=$'\v' read -r IP RC SOL_USER <<< "$LINE"
+    if [ "$RC" -eq 0 ]; then
+      if [ -z "$ME" ] || [ "$ME" = "$SOL_USER" ]; then
+        ME="$SOL_USER"
+      else
+        echo "Found conflicting username \"$SOL_USER\" on $IP, expected \"$ME\"" 1>&2
+      fi
+    fi
+  done < <(colo_instance_run_foreach "[ -n \"\$SOLANA_USER\" ] && echo \"\$SOLANA_USER\"")
+  echo "$ME"
+}
+
+COLO_SOLANA_USER=""
+colo_get_solana_user() {
+  if [ -z "$COLO_SOLANA_USER" ]; then
+    COLO_SOLANA_USER=$(colo_whoami)
+  fi
+  echo "$COLO_SOLANA_USER"
+}
+
+__colo_node_status_script() {
+  cat <<EOF
+  exec 3>&2
+  exec 2>/dev/null  # Suppress stderr as the next call to exec fails most of
+                    # the time due to $SOLANA_LOCK_FILE not existing and is running from a
+                    # subshell where normal redirection doesn't work
+  exec 9<"$SOLANA_LOCK_FILE" && flock -s 9 && . "$SOLANA_LOCK_FILE" && exec 9>&-
+  echo -e "\$SOLANA_LOCK_USER\\v\$SOLANA_LOCK_INSTANCENAME"
+  exec 2>&3 # Restore stderr
+EOF
+}
+
+__colo_node_status_result_normalize() {
+  declare IP RC US BY INSTNAME
+  declare ST="DOWN"
+  IFS=$'\v' read -r IP RC US INSTNAME <<< "$1"
+  if [ "$RC" -eq 0 ]; then
+    if [ -n "$US" ]; then
+      BY="$US"
+      ST="HELD"
+    else
+      ST="FREE"
+    fi
+  fi
+  echo -e $"$IP\v$ST\v$BY\v$INSTNAME"
+}
+
+colo_node_status() {
+  declare IP="$1"
+  __colo_node_status_result_normalize "$(colo_instance_run "$IP" "$(__colo_node_status_script)")"
+}
+
+colo_node_status_all() {
+  declare LINE
+  while read -r LINE; do
+    __colo_node_status_result_normalize "$LINE"
+  done < <(colo_instance_run_foreach "$(__colo_node_status_script)")
+}
+
+# TODO: As part of COLO_TOOD_PARALLELIZE this list will need to be maintained
+# in a lockfile to work around `cloud_CreateInstance` being called in the
+# background for fullnodes
+export COLO_RES_REQUISITIONED=()
+colo_node_requisition() {
+  declare IP=$1
+  declare INSTANCE_NAME=$2
+  declare SSH_PRIVATE_KEY="$3"
+
+  declare INDEX
+  INDEX=$(colo_res_index_from_ip "$IP")
+  declare RC=false
+
+  colo_instance_run "$IP" "$(
+cat <<EOF
+  if [ ! -f "$SOLANA_LOCK_FILE" ]; then
+    exec 9>>"$SOLANA_LOCK_FILE"
+    flock -x -n 9 || exit 1
+    [ -n "\$SOLANA_USER" ] && {
+      echo "export SOLANA_LOCK_USER=\$SOLANA_USER"
+      echo "export SOLANA_LOCK_INSTANCENAME=$INSTANCE_NAME"
+      echo "[ -v SSH_TTY -a -f \"\${HOME}/.solana-motd\" ] && cat \"\${HOME}/.solana-motd\" 1>&2"
+    } >&9 || ( rm "$SOLANA_LOCK_FILE" && false )
+    9>&-
+    cat > /solana-scratch/id_ecdsa <<EOK
+$(cat "$SSH_PRIVATE_KEY")
+EOK
+    cat > /solana-scratch/id_ecdsa.pub <<EOK
+$(cat "${SSH_PRIVATE_KEY}.pub")
+EOK
+    chmod 0600 /solana-scratch/id_ecdsa
+    cat > /solana-scratch/authorized_keys <<EOAK
+$("$__colo_here"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)
+$(cat "${SSH_PRIVATE_KEY}.pub")
+EOAK
+    cp /solana-scratch/id_ecdsa "\${HOME}/.ssh/id_ecdsa"
+    cp /solana-scratch/id_ecdsa.pub "\${HOME}/.ssh/id_ecdsa.pub"
+    cp /solana-scratch/authorized_keys "\${HOME}/.ssh/authorized_keys"
+    cat > "\${HOME}/.solana-motd" <<EOM
+
+
+$(printNetworkInfo)
+$(creationInfo)
+EOM
+
+    # XXX: Stamp creation MUST be last!
+    touch /solana-scratch/.instance-startup-complete
+  else
+    false
+  fi
+EOF
+  )"
+  # shellcheck disable=SC2181
+  if [[ 0 -eq $? ]]; then
+    COLO_RES_REQUISITIONED+=("$INDEX")
+    RC=true
+  fi
+  $RC
+}
+
+colo_node_is_requisitioned() {
+  declare INDEX="$1"
+  declare REQ
+  declare RC=false
+  for REQ in "${COLO_RES_REQUISITIONED[@]}"; do
+    if [[ $REQ -eq $INDEX ]]; then
+      RC=true
+      break
+    fi
+  done
+  $RC
+}
+
+colo_machine_types_compatible() {
+  declare MAYBE_MACH="$1"
+  declare WANT_MACH="$2"
+  declare COMPATIBLE=false
+  # XXX: Colo machine types are just GPU count ATM...
+  if [[ "$MAYBE_MACH" -ge "$WANT_MACH" ]]; then
+    COMPATIBLE=true
+  fi
+  $COMPATIBLE
+}
+
+colo_node_free() {
+  declare IP=$1
+  colo_instance_run "$IP" "$(
+cat <<EOF
+  RC=false
+  if [ -f "$SOLANA_LOCK_FILE" ]; then
+    exec 9<>"$SOLANA_LOCK_FILE"
+    flock -x -n 9 || exit 1
+    . "$SOLANA_LOCK_FILE"
+    if [ "\$SOLANA_LOCK_USER" = "\$SOLANA_USER" ]; then
+      git clean -qxdff
+      rm -f /solana-scratch/* /solana-scratch/.[^.]*
+      cat > "\${HOME}/.ssh/authorized_keys" <<EOAK
+$("$__colo_here"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)
+EOAK
+      RC=true
+    fi
+    9>&-
+  fi
+  \$RC
+EOF
+  )"
+}
+
+

+ 7 - 0
net/scripts/colo_nodes

@@ -0,0 +1,7 @@
+astroids|216.24.140.155|10.1.1.26|16|64|NVME|2000|||0|Denver
+pacman|216.24.140.154|10.1.1.25|16|64|NVME|2000|||0|Denver
+dumoulin|216.24.140.149|10.1.1.20|16|64|SATA|2000|NVME,NVME,NVME|1000,1000,1000|4|Denver
+foosball|216.24.140.150|10.1.1.21|16|64|SATA|2000|NVME|1000|2|Denver
+pingpong|216.24.140.151|10.1.1.22|16|64|SATA|2000|NVME|1000|2|Denver
+airhockey|216.24.140.152|10.1.1.23|16|64|SATA|2000|NVME|1000|2|Denver
+billiards|216.24.140.153|10.1.1.24|16|64|SATA|2000|NVME|1000|2|Denver

+ 9 - 1
net/scripts/ec2-provider.sh

@@ -391,4 +391,12 @@ cloud_FetchFile() {
 cloud_CreateAndAttachPersistentDisk() {
 cloud_CreateAndAttachPersistentDisk() {
   echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for ec2"
   echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for ec2"
   exit 1
   exit 1
-}
+}
+
+#
+# cloud_StatusAll
+#
+# Not yet implemented for this cloud provider
+cloud_StatusAll() {
+  echo "ERROR: cloud_StatusAll is not yet implemented for ec2"
+}

+ 8 - 0
net/scripts/gce-provider.sh

@@ -321,3 +321,11 @@ cloud_CreateAndAttachPersistentDisk() {
     --zone "$zone" \
     --zone "$zone" \
     --auto-delete
     --auto-delete
 }
 }
+
+#
+# cloud_StatusAll
+#
+# Not yet implemented for this cloud provider
+cloud_StatusAll() {
+  echo "ERROR: cloud_StatusAll is not yet implemented for GCE"
+}