gce.sh 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. #!/usr/bin/env bash
  2. set -e
  3. here=$(dirname "$0")
  4. # shellcheck source=net/common.sh
  5. source "$here"/common.sh
  6. cloudProvider=$(basename "$0" .sh)
  7. bootDiskType=""
  8. case $cloudProvider in
  9. gce)
  10. # shellcheck source=net/scripts/gce-provider.sh
  11. source "$here"/scripts/gce-provider.sh
  12. # use n1 instead of n2 so we don't need to spin up >= 4 local SSD's
  13. cpuBootstrapLeaderMachineType="--custom-cpu 24 --min-cpu-platform Intel%20Skylake --custom-vm-type n1"
  14. clientMachineType="--custom-cpu 16 --custom-memory 20GB"
  15. blockstreamerMachineType="--machine-type n1-standard-8"
  16. selfDestructHours=8
  17. ;;
  18. ec2)
  19. # shellcheck source=net/scripts/ec2-provider.sh
  20. source "$here"/scripts/ec2-provider.sh
  21. cpuBootstrapLeaderMachineType=m5.4xlarge
  22. clientMachineType=c5.2xlarge
  23. blockstreamerMachineType=m5.4xlarge
  24. selfDestructHours=0
  25. ;;
  26. azure)
  27. # shellcheck source=net/scripts/azure-provider.sh
  28. source "$here"/scripts/azure-provider.sh
  29. cpuBootstrapLeaderMachineType=Standard_D16s_v3
  30. clientMachineType=Standard_D16s_v3
  31. blockstreamerMachineType=Standard_D16s_v3
  32. selfDestructHours=0
  33. ;;
  34. colo)
  35. # shellcheck source=net/scripts/colo-provider.sh
  36. source "$here"/scripts/colo-provider.sh
  37. cpuBootstrapLeaderMachineType=0
  38. clientMachineType=0
  39. blockstreamerMachineType=0
  40. selfDestructHours=0
  41. ;;
  42. *)
  43. echo "Error: Unknown cloud provider: $cloudProvider"
  44. ;;
  45. esac
  46. prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
  47. additionalValidatorCount=2
  48. clientNodeCount=0
  49. blockstreamer=false
  50. validatorBootDiskSizeInGb=100
  51. clientBootDiskSizeInGb=75
  52. validatorAdditionalDiskSizeInGb=
  53. externalNodes=false
  54. failOnValidatorBootupFailure=true
  55. preemptible=true
  56. evalInfo=false
  57. tmpfsAccounts=false
  58. defaultCustomMemoryGB="$(cloud_DefaultCustomMemoryGB)"
  59. customMemoryGB="$defaultCustomMemoryGB"
  60. publicNetwork=false
  61. letsEncryptDomainName=
  62. customMachineType=
  63. customAddress=
  64. zones=()
  65. containsZone() {
  66. local e match="$1"
  67. shift
  68. for e; do [[ "$e" == "$match" ]] && return 0; done
  69. return 1
  70. }
  71. usage() {
  72. exitcode=0
  73. if [[ -n "$1" ]]; then
  74. exitcode=1
  75. echo "Error: $*"
  76. fi
  77. cat <<EOF
  78. usage: $0 [create|config|delete] [common options] [command-specific options]
  79. Manage testnet instances
  80. create - create a new testnet (implies 'config')
  81. config - configure the testnet and write a config file describing it
  82. delete - delete the testnet
  83. info - display information about the currently configured testnet
  84. status - display status information of all resources
  85. common options:
  86. -p [prefix] - Optional common prefix for instance names to avoid
  87. collisions (default: $prefix)
  88. -z [zone] - Zone(s) for the nodes (default: $(cloud_DefaultZone))
  89. If specified multiple times, the validators will be evenly
  90. distributed over all specified zones and
  91. client/blockstreamer nodes will be created in the first
  92. zone
  93. -x - append to the existing configuration instead of creating a
  94. new configuration
  95. --allow-boot-failures
  96. - Discard from config validator nodes that didn't bootup
  97. successfully
  98. create-specific options:
  99. -n [number] - Number of additional validators (default: $additionalValidatorCount)
  100. -c [number] - Number of client nodes (default: $clientNodeCount)
  101. -u - Include a Blockstreamer (default: $blockstreamer)
  102. -P - Use public network IP addresses (default: $publicNetwork)
  103. -a [address] - Address to be be assigned to the Blockstreamer if present,
  104. otherwise the bootstrap validator.
  105. * For GCE, [address] is the "name" of the desired External
  106. IP Address.
  107. * For EC2, [address] is the "allocation ID" of the desired
  108. Elastic IP.
  109. -d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE.
  110. --letsencrypt [dns name]
  111. - Attempt to generate a TLS certificate using this
  112. DNS name (useful only when the -a and -P options
  113. are also provided)
  114. --custom-machine-type [type]
  115. - Set a custom machine type.
  116. $(
  117. if [[ -n "$defaultCustomMemoryGB" ]]; then
  118. echo " --custom-memory-gb"
  119. echo " - Set memory size for custom machine type in GB (default: $defaultCustomMemoryGB)"
  120. fi
  121. )
  122. --validator-additional-disk-size-gb [number]
  123. - Add an additional [number] GB SSD to all validators to store the config directory.
  124. If not set, config will be written to the boot disk by default.
  125. Only supported on GCE.
  126. --dedicated - Use dedicated instances for additional validators
  127. (by default preemptible instances are used to reduce
  128. cost). Note that the bootstrap validator,
  129. blockstreamer and client nodes are always dedicated.
  130. Set this flag on colo to prevent your testnet from being pre-empted by nightly test automation.
  131. --self-destruct-hours [number]
  132. - Specify lifetime of the allocated instances in hours. 0 to
  133. disable. Only supported on GCE. (default: $selfDestructHours)
  134. --validator-boot-disk-size-gb [number]
  135. - Specify validator boot disk size in gb.
  136. --client-machine-type [type]
  137. - custom client machine type
  138. --tmpfs-accounts - Put accounts directory on a swap-backed tmpfs volume
  139. config-specific options:
  140. -P - Use public network IP addresses (default: $publicNetwork)
  141. delete-specific options:
  142. --reclaim-preemptible-reservations
  143. - If set, reclaims all reservations on colo nodes that were not created with --dedicated.
  144. This behavior does not filter by testnet name or owner. Only implemented on colo.
  145. --reclaim-all-reservations
  146. - If set, reclaims all reservations on all colo nodes, regardless of owner, pre-emptibility, or creator.
  147. info-specific options:
  148. --eval - Output in a form that can be eval-ed by a shell: eval \$(gce.sh info --eval)
  149. none
  150. EOF
  151. exit $exitcode
  152. }
  153. command=$1
  154. [[ -n $command ]] || usage
  155. shift
  156. [[ $command = create || $command = config || $command = info || $command = delete || $command = status ]] ||
  157. usage "Invalid command: $command"
  158. shortArgs=()
  159. while [[ -n $1 ]]; do
  160. if [[ ${1:0:2} = -- ]]; then
  161. if [[ $1 = --letsencrypt ]]; then
  162. letsEncryptDomainName="$2"
  163. shift 2
  164. elif [[ $1 = --validator-additional-disk-size-gb ]]; then
  165. validatorAdditionalDiskSizeInGb="$2"
  166. shift 2
  167. elif [[ $1 == --machine-type* || $1 == --custom-cpu* ]]; then # Bypass quoted long args
  168. shortArgs+=("$1")
  169. shift
  170. elif [[ $1 == --allow-boot-failures ]]; then
  171. failOnValidatorBootupFailure=false
  172. shift
  173. elif [[ $1 == --dedicated ]]; then
  174. preemptible=false
  175. shift
  176. elif [[ $1 == --eval ]]; then
  177. evalInfo=true
  178. shift
  179. elif [[ $1 == --enable-gpu ]]; then
  180. echo "GPU support has been dropped, --enable-gpu is a noop"
  181. shift
  182. elif [[ $1 = --custom-machine-type ]]; then
  183. customMachineType="$2"
  184. shift 2
  185. elif [[ $1 = --client-machine-type ]]; then
  186. clientMachineType="$2"
  187. shift 2
  188. elif [[ $1 = --validator-boot-disk-size-gb ]]; then
  189. validatorBootDiskSizeInGb="$2"
  190. shift 2
  191. elif [[ $1 == --self-destruct-hours ]]; then
  192. maybeTimeout=$2
  193. if [[ $maybeTimeout =~ ^[0-9]+$ ]]; then
  194. selfDestructHours=$maybeTimeout
  195. else
  196. echo " Invalid parameter ($maybeTimeout) to $1"
  197. usage 1
  198. fi
  199. shift 2
  200. elif [[ $1 == --reclaim-preemptible-reservations ]]; then
  201. reclaimOnlyPreemptibleReservations=true
  202. shift
  203. elif [[ $1 == --reclaim-all-reservations ]]; then
  204. reclaimAllReservations=true
  205. shift
  206. elif [[ $1 == --tmpfs-accounts ]]; then
  207. tmpfsAccounts=true
  208. shift
  209. elif [[ $1 == --custom-memory-gb ]]; then
  210. customMemoryGB=$2
  211. shift 2
  212. else
  213. usage "Unknown long option: $1"
  214. fi
  215. else
  216. shortArgs+=("$1")
  217. shift
  218. fi
  219. done
  220. while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do
  221. case $opt in
  222. h | \?)
  223. usage
  224. ;;
  225. p)
  226. [[ ${OPTARG//[^A-Za-z0-9-]/} == "$OPTARG" ]] || usage "Invalid prefix: \"$OPTARG\", alphanumeric only"
  227. prefix=$OPTARG
  228. ;;
  229. P)
  230. publicNetwork=true
  231. ;;
  232. n)
  233. additionalValidatorCount=$OPTARG
  234. ;;
  235. c)
  236. clientNodeCount=$OPTARG
  237. ;;
  238. z)
  239. containsZone "$OPTARG" "${zones[@]}" || zones+=("$OPTARG")
  240. ;;
  241. g)
  242. echo "GPU support has been dropped, -g argument is a noop"
  243. ;;
  244. G)
  245. echo "GPU support has been dropped, -G argument is a noop"
  246. ;;
  247. a)
  248. customAddress=$OPTARG
  249. ;;
  250. d)
  251. bootDiskType=$OPTARG
  252. ;;
  253. u)
  254. blockstreamer=true
  255. ;;
  256. x)
  257. externalNodes=true
  258. ;;
  259. *)
  260. usage "unhandled option: $opt"
  261. ;;
  262. esac
  263. done
  264. [[ ${#zones[@]} -gt 0 ]] || zones+=("$(cloud_DefaultZone)")
  265. [[ -z $1 ]] || usage "Unexpected argument: $1"
  266. if [[ $cloudProvider = ec2 ]]; then
  267. # EC2 keys can't be retrieved from running instances like GCE keys can so save
  268. # EC2 keys in the user's home directory so |./ec2.sh config| can at least be
  269. # used on the same host that ran |./ec2.sh create| .
  270. sshPrivateKey="$HOME/.ssh/solana-net-id_$prefix"
  271. else
  272. sshPrivateKey="$netConfigDir/id_$prefix"
  273. fi
  274. case $cloudProvider in
  275. gce)
  276. if [[ "$tmpfsAccounts" = "true" ]]; then
  277. cpuBootstrapLeaderMachineType+=" --local-ssd interface=nvme"
  278. if [[ $customMemoryGB -lt 100 ]]; then
  279. # shellcheck disable=SC2016 # We don't want expression expansion on these backticks
  280. echo -e '\nWarning: At least 100GB of system RAM is recommending with `--tmpfs-accounts` (see `--custom-memory-gb`)\n'
  281. fi
  282. fi
  283. cpuBootstrapLeaderMachineType+=" --custom-memory ${customMemoryGB}GB"
  284. ;;
  285. ec2|azure|colo)
  286. if [[ -n $validatorAdditionalDiskSizeInGb ]] ; then
  287. usage "--validator-additional-disk-size-gb currently only supported with cloud provider: gce"
  288. fi
  289. if [[ "$tmpfsAccounts" = "true" ]]; then
  290. usage "--tmpfs-accounts only supported on cloud provider: gce"
  291. fi
  292. if [[ "$customMemoryGB" != "$defaultCustomMemoryGB" ]]; then
  293. usage "--custom-memory-gb only supported on cloud provider: gce"
  294. fi
  295. ;;
  296. *)
  297. echo "Error: Unknown cloud provider: $cloudProvider"
  298. ;;
  299. esac
  300. case $cloudProvider in
  301. gce | ec2 | azure)
  302. maybePreemptible="never preemptible"
  303. ;;
  304. colo)
  305. maybePreemptible=$preemptible
  306. ;;
  307. *)
  308. echo "Error: Unknown cloud provider: $cloudProvider"
  309. ;;
  310. esac
  311. if [[ $reclaimOnlyPreemptibleReservations == "true" && $reclaimAllReservations == "true" ]]; then
  312. usage "Cannot set both --reclaim-preemptible-reservations and --reclaim-all-reservations. Set one or none"
  313. fi
  314. if [[ -n $reclaimAllReservations || -n $reclaimOnlyPreemptibleReservations ]]; then
  315. forceDelete="true"
  316. fi
  317. if [[ -n "$customMachineType" ]] ; then
  318. bootstrapLeaderMachineType="$customMachineType"
  319. else
  320. bootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType"
  321. fi
  322. validatorMachineType=$bootstrapLeaderMachineType
  323. blockstreamerMachineType=$bootstrapLeaderMachineType
  324. # cloud_ForEachInstance [cmd] [extra args to cmd]
  325. #
  326. # Execute a command for each element in the `instances` array
  327. #
  328. # cmd - The command to execute on each instance
  329. # The command will receive arguments followed by any
  330. # additional arguments supplied to cloud_ForEachInstance:
  331. # name - name of the instance
  332. # publicIp - The public IP address of this instance
  333. # privateIp - The private IP address of this instance
  334. # zone - Zone of this instance
  335. # count - Monotonically increasing count for each
  336. # invocation of cmd, starting at 1
  337. # ... - Extra args to cmd..
  338. #
  339. #
  340. cloud_ForEachInstance() {
  341. declare cmd="$1"
  342. shift
  343. [[ -n $cmd ]] || { echo cloud_ForEachInstance: cmd not specified; exit 1; }
  344. declare count=1
  345. for info in "${instances[@]}"; do
  346. declare name publicIp privateIp
  347. IFS=: read -r name publicIp privateIp zone < <(echo "$info")
  348. # shellcheck disable=SC2294
  349. eval "$cmd" "$name" "$publicIp" "$privateIp" "$zone" "$count" "$@"
  350. count=$((count + 1))
  351. done
  352. }
  353. # Given a cloud provider zone, return an approximate lat,long location for the
  354. # data center. Normal geoip lookups for cloud provider IP addresses are
  355. # sometimes widely inaccurate.
  356. zoneLocation() {
  357. declare zone="$1"
  358. case "$zone" in
  359. us-west1*)
  360. echo "[45.5946, -121.1787]"
  361. ;;
  362. us-central1*)
  363. echo "[41.2619, -95.8608]"
  364. ;;
  365. us-east1*)
  366. echo "[33.1960, -80.0131]"
  367. ;;
  368. asia-east2*)
  369. echo "[22.3193, 114.1694]"
  370. ;;
  371. asia-northeast1*)
  372. echo "[35.6762, 139.6503]"
  373. ;;
  374. asia-northeast2*)
  375. echo "[34.6937, 135.5023]"
  376. ;;
  377. asia-south1*)
  378. echo "[19.0760, 72.8777]"
  379. ;;
  380. asia-southeast1*)
  381. echo "[1.3404, 103.7090]"
  382. ;;
  383. australia-southeast1*)
  384. echo "[-33.8688, 151.2093]"
  385. ;;
  386. europe-north1*)
  387. echo "[60.5693, 27.1878]"
  388. ;;
  389. europe-west2*)
  390. echo "[51.5074, -0.1278]"
  391. ;;
  392. europe-west3*)
  393. echo "[50.1109, 8.6821]"
  394. ;;
  395. europe-west4*)
  396. echo "[53.4386, 6.8355]"
  397. ;;
  398. europe-west6*)
  399. echo "[47.3769, 8.5417]"
  400. ;;
  401. northamerica-northeast1*)
  402. echo "[45.5017, -73.5673]"
  403. ;;
  404. southamerica-east1*)
  405. echo "[-23.5505, -46.6333]"
  406. ;;
  407. *)
  408. ;;
  409. esac
  410. }
  411. prepareInstancesAndWriteConfigFile() {
  412. $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
  413. if $externalNodes; then
  414. echo "Appending to existing config file"
  415. echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile"
  416. else
  417. rm -f "$geoipConfigFile"
  418. cat >> "$configFile" <<EOF
  419. # autogenerated at $(date)
  420. netBasename=$prefix
  421. publicNetwork=$publicNetwork
  422. sshPrivateKey=$sshPrivateKey
  423. letsEncryptDomainName=$letsEncryptDomainName
  424. export TMPFS_ACCOUNTS=$tmpfsAccounts
  425. EOF
  426. fi
  427. touch "$geoipConfigFile"
  428. buildSshOptions
  429. cloud_RestartPreemptedInstances "$prefix"
  430. fetchPrivateKey() {
  431. declare nodeName
  432. declare nodeIp
  433. declare nodeZone
  434. IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
  435. # Make sure the machine is alive or pingable
  436. timeout_sec=90
  437. cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec"
  438. if [[ ! -r $sshPrivateKey ]]; then
  439. echo "Fetching $sshPrivateKey from $nodeName"
  440. # Try to scp in a couple times, sshd may not yet be up even though the
  441. # machine can be pinged...
  442. (
  443. set -o pipefail
  444. for i in $(seq 1 60); do
  445. set -x
  446. cloud_FetchFile "$nodeName" "$nodeIp" /solana-scratch/id_ecdsa "$sshPrivateKey" "$nodeZone" &&
  447. cloud_FetchFile "$nodeName" "$nodeIp" /solana-scratch/id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone" &&
  448. break
  449. set +x
  450. sleep 1
  451. echo "Retry $i..."
  452. done
  453. )
  454. chmod 400 "$sshPrivateKey"
  455. ls -l "$sshPrivateKey"
  456. fi
  457. }
  458. recordInstanceIp() {
  459. declare name="$1"
  460. declare publicIp="$2"
  461. declare privateIp="$3"
  462. declare zone="$4"
  463. #declare index="$5"
  464. declare failOnFailure="$6"
  465. declare arrayName="$7"
  466. if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
  467. if $failOnFailure; then
  468. exit 1
  469. else
  470. return 0
  471. fi
  472. fi
  473. ok=true
  474. echo "Waiting for $name to finish booting..."
  475. (
  476. set +e
  477. fetchPrivateKey || exit 1
  478. for i in $(seq 1 60); do
  479. (
  480. set -x
  481. timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /solana-scratch/.instance-startup-complete"
  482. )
  483. ret=$?
  484. if [[ $ret -eq 0 ]]; then
  485. echo "$name has booted."
  486. exit 0
  487. fi
  488. sleep 5
  489. echo "Retry $i..."
  490. done
  491. echo "$name failed to boot."
  492. exit 1
  493. ) || ok=false
  494. if ! $ok; then
  495. if $failOnFailure; then
  496. exit 1
  497. fi
  498. else
  499. {
  500. echo "$arrayName+=($publicIp) # $name"
  501. echo "${arrayName}Private+=($privateIp) # $name"
  502. echo "${arrayName}Zone+=($zone) # $name"
  503. } >> "$configFile"
  504. declare latlng=
  505. latlng=$(zoneLocation "$zone")
  506. if [[ -n $latlng ]]; then
  507. echo "$publicIp: $latlng" >> "$geoipConfigFile"
  508. fi
  509. fi
  510. }
  511. if $externalNodes; then
  512. echo "Bootstrap validator is already configured"
  513. else
  514. echo "Looking for bootstrap validator instance..."
  515. cloud_FindInstance "$prefix-bootstrap-validator"
  516. [[ ${#instances[@]} -eq 1 ]] || {
  517. echo "Unable to find bootstrap validator"
  518. exit 1
  519. }
  520. echo "validatorIpList=()" >> "$configFile"
  521. echo "validatorIpListPrivate=()" >> "$configFile"
  522. cloud_ForEachInstance recordInstanceIp true validatorIpList
  523. fi
  524. if [[ $additionalValidatorCount -gt 0 ]]; then
  525. numZones=${#zones[@]}
  526. if [[ $additionalValidatorCount -gt $numZones ]]; then
  527. numNodesPerZone=$((additionalValidatorCount / numZones))
  528. numLeftOverNodes=$((additionalValidatorCount % numZones))
  529. else
  530. numNodesPerZone=1
  531. numLeftOverNodes=0
  532. fi
  533. for ((i=((numZones - 1)); i >= 0; i--)); do
  534. zone=${zones[i]}
  535. if [[ $i -eq 0 ]]; then
  536. numNodesPerZone=$((numNodesPerZone + numLeftOverNodes))
  537. fi
  538. echo "Looking for additional validator instances in $zone ..."
  539. cloud_FindInstances "$prefix-$zone-validator"
  540. declare numInstances=${#instances[@]}
  541. if [[ $numInstances -ge $numNodesPerZone || ( ! $failOnValidatorBootupFailure && $numInstances -gt 0 ) ]]; then
  542. cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" validatorIpList
  543. else
  544. echo "Unable to find additional validators"
  545. if $failOnValidatorBootupFailure; then
  546. exit 1
  547. fi
  548. fi
  549. done
  550. fi
  551. if ! $externalNodes; then
  552. echo "clientIpList=()" >> "$configFile"
  553. echo "clientIpListPrivate=()" >> "$configFile"
  554. fi
  555. echo "Looking for client bencher instances..."
  556. cloud_FindInstances "$prefix-client"
  557. [[ ${#instances[@]} -eq 0 ]] || {
  558. cloud_ForEachInstance recordInstanceIp true clientIpList
  559. }
  560. if ! $externalNodes; then
  561. echo "blockstreamerIpList=()" >> "$configFile"
  562. echo "blockstreamerIpListPrivate=()" >> "$configFile"
  563. fi
  564. echo "Looking for blockstreamer instances..."
  565. cloud_FindInstances "$prefix-blockstreamer"
  566. [[ ${#instances[@]} -eq 0 ]] || {
  567. cloud_ForEachInstance recordInstanceIp true blockstreamerIpList
  568. }
  569. echo "Wrote $configFile"
  570. $metricsWriteDatapoint "testnet-deploy net-config-complete=1"
  571. }
  572. delete() {
  573. $metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
  574. case $cloudProvider in
  575. gce | ec2 | azure)
  576. # Filter for all nodes
  577. filter="$prefix-"
  578. ;;
  579. colo)
  580. if [[ -n $forceDelete ]]; then
  581. filter=".*-"
  582. else
  583. filter="$prefix-"
  584. fi
  585. ;;
  586. *)
  587. echo "Error: Unknown cloud provider: $cloudProvider"
  588. ;;
  589. esac
  590. echo "Searching for instances: $filter"
  591. cloud_FindInstances "$filter" "$reclaimOnlyPreemptibleReservations"
  592. if [[ ${#instances[@]} -eq 0 ]]; then
  593. echo "No instances found matching '$filter'"
  594. else
  595. cloud_DeleteInstances $forceDelete
  596. fi
  597. wait
  598. if $externalNodes; then
  599. echo "Let's not delete the current configuration file"
  600. else
  601. rm -f "$configFile"
  602. fi
  603. $metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
  604. }
  605. create_error_cleanup() {
  606. declare RC=$?
  607. if [[ "$RC" -ne 0 ]]; then
  608. delete
  609. fi
  610. exit $RC
  611. }
  612. case $command in
  613. delete)
  614. delete
  615. ;;
  616. create)
  617. [[ -n $additionalValidatorCount ]] || usage "Need number of nodes"
  618. delete
  619. $metricsWriteDatapoint "testnet-deploy net-create-begin=1"
  620. if $failOnValidatorBootupFailure; then
  621. trap create_error_cleanup EXIT
  622. fi
  623. rm -rf "$sshPrivateKey"{,.pub}
  624. # Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa
  625. ssh-keygen -t rsa -N '' -f "$sshPrivateKey"
  626. printNetworkInfo() {
  627. cat <<EOF
  628. ==[ Network composition ]===============================================================
  629. Bootstrap validator = $bootstrapLeaderMachineType
  630. Additional validators = $additionalValidatorCount x $validatorMachineType
  631. Client(s) = $clientNodeCount x $clientMachineType
  632. Blockstreamer = $blockstreamer
  633. ========================================================================================
  634. EOF
  635. }
  636. printNetworkInfo
  637. creationDate=$(date)
  638. creationInfo() {
  639. cat <<EOF
  640. Instance running since: $creationDate
  641. ========================================================================================
  642. EOF
  643. }
  644. declare startupScript="$netConfigDir"/instance-startup-script.sh
  645. cat > "$startupScript" <<EOF
  646. #!/usr/bin/env bash
  647. # autogenerated at $(date)
  648. set -ex
  649. if [[ -f /solana-scratch/.instance-startup-complete ]]; then
  650. echo reboot
  651. $(
  652. cd "$here"/scripts/
  653. if [[ -n $validatorAdditionalDiskSizeInGb ]]; then
  654. cat mount-additional-disk.sh
  655. fi
  656. cat ../../scripts/ulimit-n.sh
  657. )
  658. if [[ -x ~solana/solana/on-reboot ]]; then
  659. sudo -u solana ~solana/solana/on-reboot
  660. fi
  661. # Skip most setup on instance reboot
  662. exit 0
  663. fi
  664. cat > /etc/motd <<EOM
  665. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  666. This instance has not been fully configured.
  667. See startup script log messages in /var/log/syslog for status:
  668. $ sudo cat /var/log/syslog | egrep \\(startup-script\\|cloud-init\)
  669. To block until setup is complete, run:
  670. $ until [[ -f /solana-scratch/.instance-startup-complete ]]; do sleep 1; done
  671. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  672. $(creationInfo)
  673. EOM
  674. # Place the generated private key at /solana-scratch/id_ecdsa so it's retrievable by anybody
  675. # who is able to log into this machine
  676. mkdir -m 0777 /solana-scratch
  677. cat > /solana-scratch/id_ecdsa <<EOK
  678. $(cat "$sshPrivateKey")
  679. EOK
  680. cat > /solana-scratch/id_ecdsa.pub <<EOK
  681. $(cat "$sshPrivateKey.pub")
  682. EOK
  683. chmod 444 /solana-scratch/id_ecdsa
  684. USER=\$(id -un)
  685. export DEBIAN_FRONTEND=noninteractive
  686. $(
  687. cd "$here"/scripts/
  688. cat \
  689. disable-background-upgrades.sh \
  690. create-solana-user.sh \
  691. install-ag.sh \
  692. install-at.sh \
  693. install-certbot.sh \
  694. install-earlyoom.sh \
  695. install-iftop.sh \
  696. install-jq.sh \
  697. install-libssl.sh \
  698. install-rsync.sh \
  699. install-perf.sh \
  700. localtime.sh \
  701. network-config.sh \
  702. remove-docker-interface.sh \
  703. if [[ -n $validatorAdditionalDiskSizeInGb ]]; then
  704. cat mount-additional-disk.sh
  705. fi
  706. if [[ $selfDestructHours -gt 0 ]]; then
  707. cat <<EOSD
  708. # Setup GCE self-destruct
  709. cat >/solana-scratch/gce-self-destruct.sh <<'EOS'
  710. $(cat gce-self-destruct.sh)
  711. EOS
  712. EOSD
  713. cat <<'EOSD'
  714. # Populate terminal prompt update script
  715. cat >/solana-scratch/gce-self-destruct-ps1.sh <<'EOS'
  716. #!/usr/bin/env bash
  717. source "$(dirname "$0")/gce-self-destruct.sh"
  718. gce_self_destruct_ps1
  719. EOS
  720. chmod +x /solana-scratch/gce-self-destruct-ps1.sh
  721. # Append MOTD and PS1 replacement to .profile
  722. cat >>~solana/.profile <<'EOS'
  723. # Print self-destruct countdown on login
  724. source "/solana-scratch/gce-self-destruct.sh"
  725. gce_self_destruct_motd
  726. # Add self-destruct countdown to terminal prompt
  727. export PS1='\[\e]0;\u@\h: \w\a\]${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]$(/solana-scratch/gce-self-destruct-ps1.sh):\[\033[01;34m\]\w\[\033[00m\]\$ '
  728. EOS
  729. EOSD
  730. cat <<EOSD
  731. source /solana-scratch/gce-self-destruct.sh
  732. gce_self_destruct_setup $selfDestructHours
  733. EOSD
  734. fi
  735. )
  736. cat > /etc/motd <<EOM
  737. See startup script log messages in /var/log/syslog for status:
  738. $ sudo cat /var/log/syslog | egrep \\(startup-script\\|cloud-init\)
  739. $(printNetworkInfo)
  740. $(creationInfo)
  741. EOM
  742. $(
  743. if [[ "$tmpfsAccounts" = "true" ]]; then
  744. cat <<'EOSWAP'
  745. # Setup swap/tmpfs for accounts
  746. tmpfsMountPoint=/mnt/solana-accounts
  747. swapDevice="/dev/nvme0n1"
  748. swapUUID="43076c54-7840-4e59-a368-2d164f8984fb"
  749. mkswap --uuid "$swapUUID" "$swapDevice"
  750. echo "UUID=$swapUUID swap swap defaults 0 0" >> /etc/fstab
  751. swapon "UUID=$swapUUID"
  752. mkdir -p -m 0777 "$tmpfsMountPoint"
  753. echo "tmpfs $tmpfsMountPoint tmpfs defaults,size=300G 0 0" >> /etc/fstab
  754. mount "$tmpfsMountPoint"
  755. EOSWAP
  756. fi
  757. )
  758. touch /solana-scratch/.instance-startup-complete
  759. EOF
  760. if $blockstreamer; then
  761. blockstreamerAddress=$customAddress
  762. else
  763. bootstrapLeaderAddress=$customAddress
  764. fi
  765. for zone in "${zones[@]}"; do
  766. cloud_Initialize "$prefix" "$zone"
  767. done
  768. if $externalNodes; then
  769. echo "Bootstrap validator is already configured"
  770. else
  771. cloud_CreateInstances "$prefix" "$prefix-bootstrap-validator" 1 \
  772. "$bootstrapLeaderMachineType" "${zones[0]}" "$validatorBootDiskSizeInGb" \
  773. "$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$validatorAdditionalDiskSizeInGb" \
  774. "$maybePreemptible" "$sshPrivateKey"
  775. fi
  776. if [[ $additionalValidatorCount -gt 0 ]]; then
  777. num_zones=${#zones[@]}
  778. if [[ $additionalValidatorCount -gt $num_zones ]]; then
  779. numNodesPerZone=$((additionalValidatorCount / num_zones))
  780. numLeftOverNodes=$((additionalValidatorCount % num_zones))
  781. else
  782. numNodesPerZone=1
  783. numLeftOverNodes=0
  784. fi
  785. for ((i=((num_zones - 1)); i >= 0; i--)); do
  786. zone=${zones[i]}
  787. if [[ $i -eq 0 ]]; then
  788. numNodesPerZone=$((numNodesPerZone + numLeftOverNodes))
  789. fi
  790. cloud_CreateInstances "$prefix" "$prefix-$zone-validator" "$numNodesPerZone" \
  791. "$validatorMachineType" "$zone" "$validatorBootDiskSizeInGb" \
  792. "$startupScript" "" "$bootDiskType" "$validatorAdditionalDiskSizeInGb" \
  793. "$preemptible" "$sshPrivateKey" &
  794. done
  795. wait
  796. fi
  797. if [[ $clientNodeCount -gt 0 ]]; then
  798. cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
  799. "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
  800. "$startupScript" "" "$bootDiskType" "" "$maybePreemptible" "$sshPrivateKey"
  801. fi
  802. if $blockstreamer; then
  803. cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
  804. "$blockstreamerMachineType" "${zones[0]}" "$validatorBootDiskSizeInGb" \
  805. "$startupScript" "$blockstreamerAddress" "$bootDiskType" "" "$maybePreemptible" "$sshPrivateKey"
  806. fi
  807. $metricsWriteDatapoint "testnet-deploy net-create-complete=1"
  808. prepareInstancesAndWriteConfigFile
  809. ;;
  810. config)
  811. failOnValidatorBootupFailure=false
  812. prepareInstancesAndWriteConfigFile
  813. ;;
  814. info)
  815. loadConfigFile
  816. printNode() {
  817. declare nodeType=$1
  818. declare ip=$2
  819. declare ipPrivate=$3
  820. declare zone=$4
  821. printf " %-16s | %-15s | %-15s | %s\n" "$nodeType" "$ip" "$ipPrivate" "$zone"
  822. }
  823. if $evalInfo; then
  824. echo "NET_NUM_VALIDATORS=${#validatorIpList[@]}"
  825. echo "NET_NUM_CLIENTS=${#clientIpList[@]}"
  826. echo "NET_NUM_BLOCKSTREAMERS=${#blockstreamerIpList[@]}"
  827. else
  828. printNode "Node Type" "Public IP" "Private IP" "Zone"
  829. echo "-------------------+-----------------+-----------------+--------------"
  830. fi
  831. nodeType=bootstrap-validator
  832. if [[ ${#validatorIpList[@]} -gt 0 ]]; then
  833. for i in $(seq 0 $(( ${#validatorIpList[@]} - 1)) ); do
  834. ipAddress=${validatorIpList[$i]}
  835. ipAddressPrivate=${validatorIpListPrivate[$i]}
  836. zone=${validatorIpListZone[$i]}
  837. if $evalInfo; then
  838. echo "NET_VALIDATOR${i}_IP=$ipAddress"
  839. else
  840. printNode $nodeType "$ipAddress" "$ipAddressPrivate" "$zone"
  841. fi
  842. nodeType=validator
  843. done
  844. fi
  845. if [[ ${#clientIpList[@]} -gt 0 ]]; then
  846. for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do
  847. ipAddress=${clientIpList[$i]}
  848. ipAddressPrivate=${clientIpListPrivate[$i]}
  849. zone=${clientIpListZone[$i]}
  850. if $evalInfo; then
  851. echo "NET_CLIENT${i}_IP=$ipAddress"
  852. else
  853. printNode client "$ipAddress" "$ipAddressPrivate" "$zone"
  854. fi
  855. done
  856. fi
  857. if [[ ${#blockstreamerIpList[@]} -gt 0 ]]; then
  858. for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do
  859. ipAddress=${blockstreamerIpList[$i]}
  860. ipAddressPrivate=${blockstreamerIpListPrivate[$i]}
  861. zone=${blockstreamerIpListZone[$i]}
  862. if $evalInfo; then
  863. echo "NET_BLOCKSTREAMER${i}_IP=$ipAddress"
  864. else
  865. printNode blockstreamer "$ipAddress" "$ipAddressPrivate" "$zone"
  866. fi
  867. done
  868. fi
  869. ;;
  870. status)
  871. cloud_StatusAll
  872. ;;
  873. *)
  874. usage "Unknown command: $command"
  875. esac