net.sh 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249
  1. #!/usr/bin/env bash
  2. set -e
  3. here=$(dirname "$0")
  4. SOLANA_ROOT="$(cd "$here"/..; pwd)"
  5. # shellcheck source=net/common.sh
  6. source "$here"/common.sh
  7. usage() {
  8. exitcode=0
  9. if [[ -n "$1" ]]; then
  10. exitcode=1
  11. echo "Error: $*"
  12. fi
  13. CLIENT_OPTIONS=$(cat << EOM
  14. -c clientType=numClients=extraArgs - Number of clientTypes to start. This options can be specified
  15. more than once. Defaults to bench-tps for all clients if not
  16. specified.
  17. Valid client types are:
  18. idle
  19. bench-tps
  20. User can optionally provide extraArgs that are transparently
  21. supplied to the client program as command line parameters.
  22. For example,
  23. -c bench-tps=2="--tx_count 25000"
  24. This will start 2 bench-tps clients, and supply "--tx_count 25000"
  25. to the bench-tps client.
  26. --use-unstaked-connection - Use unstaked connection. By default, staked connection with
  27. bootstrap node credendials is used.
  28. EOM
  29. )
  30. cat <<EOF
  31. usage: $0 [start|stop|restart|sanity] [command-specific options]
  32. Operate a configured testnet
  33. start - Start the network
  34. sanity - Sanity check the network
  35. stop - Stop the network
  36. restart - Shortcut for stop then start
  37. logs - Fetch remote logs from each network node
  38. startnode - Start an individual node (previously stopped with stopNode)
  39. stopnode - Stop an individual node
  40. startclients - Start client nodes only
  41. prepare - Prepare software deployment. (Build/download the software release)
  42. update - Deploy a new software update to the cluster
  43. upgrade - Upgrade software on bootstrap validator. (Restart bootstrap validator manually to run it)
  44. start-specific options:
  45. -T [tarFilename] - Deploy the specified release tarball
  46. -t edge|beta|stable|vX.Y.Z - Deploy the latest tarball release for the
  47. specified release channel (edge|beta|stable) or release tag
  48. (vX.Y.Z)
  49. -r / --skip-setup - Reuse existing node/ledger configuration from a
  50. previous |start| (ie, don't run ./multinode-demo/setup.sh).
  51. -d / --debug - Build/deploy the testnet with debug binaries
  52. $CLIENT_OPTIONS
  53. --client-delay-start
  54. - Number of seconds to wait after validators have finished starting before starting client programs
  55. (default: $clientDelayStart)
  56. -n NUM_VALIDATORS - Number of validators to apply command to.
  57. --gpu-mode GPU_MODE - Specify GPU mode to launch validators with (default: $gpuMode).
  58. MODE must be one of
  59. on - GPU *required*, any vendor *
  60. off - No GPU, CPU-only
  61. auto - Use GPU if available, any vendor *
  62. cuda - GPU *required*, Nvidia CUDA only
  63. * Currently, Nvidia CUDA is the only supported GPU vendor
  64. --hashes-per-tick NUM_HASHES|sleep|auto
  65. - Override the default --hashes-per-tick for the cluster
  66. --no-airdrop
  67. - If set, disables the faucet keypair. Nodes must be funded in genesis config
  68. --faucet-lamports NUM_LAMPORTS_TO_MINT
  69. - Override the default 500000000000000000 lamports minted in genesis
  70. --extra-primordial-stakes NUM_EXTRA_PRIMORDIAL_STAKES
  71. - Number of nodes to be initially staked in genesis.
  72. Gives extra stake in genesis to NUM_EXTRA_PRIMORDIAL_STAKES many nodes.
  73. Implies --wait-for-supermajority 1 --async-node-init and the supermajority
  74. wait slot may be overridden with the corresponding flag
  75. --internal-nodes-stake-lamports NUM_LAMPORTS_PER_NODE
  76. - Amount to stake internal nodes.
  77. --internal-nodes-lamports NUM_LAMPORTS_PER_NODE
  78. - Amount to fund internal nodes in genesis config.
  79. --external-accounts-file FILE_PATH
  80. - A YML file with a list of account pubkeys and corresponding lamport balances
  81. in genesis config for external nodes
  82. --no-snapshot-fetch
  83. - If set, disables booting validators from a snapshot
  84. --skip-poh-verify
  85. - If set, validators will skip verifying
  86. the ledger they already have saved to disk at
  87. boot (results in a much faster boot)
  88. --no-deploy
  89. - Don't deploy new software, use the
  90. existing deployment
  91. --no-build
  92. - Don't build new software, deploy the
  93. existing binaries
  94. --deploy-if-newer - Only deploy if newer software is
  95. available (requires -t or -T)
  96. --cluster-type development|devnet|testnet|mainnet-beta
  97. - Specify whether or not to launch the cluster in "development" mode with all features enabled at epoch 0,
  98. or various other live clusters' feature set (default: development)
  99. --slots-per-epoch SLOTS
  100. - Override the number of slots in an epoch
  101. --warp-slot WARP_SLOT
  102. - Boot from a snapshot that has warped ahead to WARP_SLOT rather than a slot 0 genesis.
  103. --full-rpc
  104. - Support full RPC services on all nodes
  105. --tpu-disable-quic
  106. - Disable quic for tpu packet forwarding
  107. --tpu-enable-udp
  108. - Enable UDP for tpu transactions
  109. --client-type
  110. - Specify backend client type for bench-tps. Valid options are (rpc-client|tpu-client), tpu-client is default
  111. sanity/start-specific options:
  112. -F - Discard validator nodes that didn't bootup successfully
  113. -o noInstallCheck - Skip agave-install sanity
  114. -o rejectExtraNodes - Require the exact number of nodes
  115. stop-specific options:
  116. none
  117. logs-specific options:
  118. none
  119. netem-specific options:
  120. --config - Netem configuration (as a double quoted string)
  121. --parition - Percentage of network that should be configured with netem
  122. --config-file - Configuration file for partition and netem configuration
  123. --netem-cmd - Optional command argument to netem. Default is "add". Use "cleanup" to remove rules.
  124. update-specific options:
  125. --platform linux|osx|windows - Deploy the tarball using 'agave-install deploy ...' for the
  126. given platform (multiple platforms may be specified)
  127. (-t option must be supplied as well)
  128. startnode/stopnode-specific options:
  129. -i [ip address] - IP Address of the node to start or stop
  130. startnode specific option:
  131. --wen-restart [coordinator_pubkey] - Use given coordinator pubkey and apply wen_restat
  132. startclients-specific options:
  133. $CLIENT_OPTIONS
  134. Note: if RUST_LOG is set in the environment it will be propagated into the
  135. network nodes.
  136. EOF
  137. exit $exitcode
  138. }
  139. initLogDir() { # Initializes the netLogDir global variable. Idempotent
  140. [[ -z $netLogDir ]] || return 0
  141. netLogDir="$netDir"/log
  142. declare netLogDateDir
  143. netLogDateDir="$netDir"/log-$(date +"%Y-%m-%d_%H_%M_%S")
  144. if [[ -d $netLogDir && ! -L $netLogDir ]]; then
  145. echo "Warning: moving $netLogDir to make way for symlink."
  146. mv "$netLogDir" "$netDir"/log.old
  147. elif [[ -L $netLogDir ]]; then
  148. rm "$netLogDir"
  149. fi
  150. mkdir -p "$netConfigDir" "$netLogDateDir"
  151. ln -sf "$netLogDateDir" "$netLogDir"
  152. echo "Log directory: $netLogDateDir"
  153. }
  154. annotate() {
  155. [[ -z $BUILDKITE ]] || {
  156. buildkite-agent annotate "$@"
  157. }
  158. }
  159. annotateBlockexplorerUrl() {
  160. declare blockstreamer=${blockstreamerIpList[0]}
  161. if [[ -n $blockstreamer ]]; then
  162. annotate --style info --context blockexplorer-url "Block explorer: http://$blockstreamer/"
  163. fi
  164. }
  165. build() {
  166. supported=("22.04")
  167. declare MAYBE_DOCKER=
  168. if [[ $(uname) != Linux || ! " ${supported[*]} " =~ $(lsb_release -sr) ]]; then
  169. # shellcheck source=ci/docker/env.sh
  170. source "$SOLANA_ROOT"/ci/docker/env.sh
  171. MAYBE_DOCKER="ci/docker-run.sh ${CI_DOCKER_IMAGE:?}"
  172. fi
  173. SECONDS=0
  174. (
  175. cd "$SOLANA_ROOT"
  176. echo "--- Build started at $(date)"
  177. set -x
  178. rm -rf farf
  179. buildVariant=
  180. if $debugBuild; then
  181. buildVariant=--debug
  182. fi
  183. if $profileBuild; then
  184. profilerFlags="RUSTFLAGS='-C force-frame-pointers=y -g ${RUSTFLAGS}'"
  185. fi
  186. $MAYBE_DOCKER bash -c "
  187. set -ex
  188. $profilerFlags scripts/cargo-install-all.sh farf $buildVariant --validator-only
  189. "
  190. )
  191. (
  192. set +e
  193. COMMIT="$(git rev-parse HEAD)"
  194. BRANCH="$(git rev-parse --abbrev-ref HEAD)"
  195. TAG="$(git describe --exact-match --tags HEAD 2>/dev/null)"
  196. if [[ $TAG =~ ^v[0-9]+\.[0-9]+\.[0-9]+ ]]; then
  197. NOTE=$TAG
  198. else
  199. NOTE=$BRANCH
  200. fi
  201. (
  202. echo "channel: devbuild $NOTE"
  203. echo "commit: $COMMIT"
  204. ) > "$SOLANA_ROOT"/farf/version.yml
  205. )
  206. echo "Build took $SECONDS seconds"
  207. }
  208. remoteHomeDir() {
  209. declare ipAddress=$1
  210. declare remoteHome
  211. remoteHome="$(ssh "${sshOptions[@]}" "$ipAddress" "echo \$HOME")"
  212. echo "$remoteHome"
  213. }
  214. startCommon() {
  215. declare ipAddress=$1
  216. declare remoteHome
  217. remoteHome=$(remoteHomeDir "$ipAddress")
  218. local remoteSolanaHome="${remoteHome}/solana"
  219. local remoteCargoBin="${remoteHome}/.cargo/bin"
  220. test -d "$SOLANA_ROOT"
  221. if $skipSetup; then
  222. # shellcheck disable=SC2029
  223. ssh "${sshOptions[@]}" "$ipAddress" "
  224. set -x;
  225. mkdir -p $remoteSolanaHome/config;
  226. rm -rf ~/config;
  227. mv $remoteSolanaHome/config ~;
  228. rm -rf $remoteSolanaHome;
  229. mkdir -p $remoteSolanaHome $remoteCargoBin;
  230. mv ~/config $remoteSolanaHome/
  231. "
  232. else
  233. # shellcheck disable=SC2029
  234. ssh "${sshOptions[@]}" "$ipAddress" "
  235. set -x;
  236. rm -rf $remoteSolanaHome;
  237. mkdir -p $remoteCargoBin
  238. "
  239. fi
  240. [[ -z "$externalNodeSshKey" ]] || ssh-copy-id -f -i "$externalNodeSshKey" "${sshOptions[@]}" "solana@$ipAddress"
  241. syncScripts "$ipAddress"
  242. }
  243. syncScripts() {
  244. echo "rsyncing scripts... to $ipAddress"
  245. declare ipAddress=$1
  246. declare remoteHome
  247. remoteHome=$(remoteHomeDir "$ipAddress")
  248. local remoteSolanaHome="${remoteHome}/solana"
  249. rsync -vPrc -e "ssh ${sshOptions[*]}" \
  250. --exclude 'net/log*' \
  251. "$SOLANA_ROOT"/{fetch-perf-libs.sh,fetch-programs.sh,fetch-core-bpf.sh,fetch-spl.sh,scripts,net,multinode-demo} \
  252. "$ipAddress":"$remoteSolanaHome"/ > /dev/null
  253. }
  254. # Deploy local binaries to bootstrap validator. Other validators and clients later fetch the
  255. # binaries from it
  256. deployBootstrapValidator() {
  257. declare ipAddress=$1
  258. declare remoteHome
  259. remoteHome=$(remoteHomeDir "$ipAddress")
  260. local remoteCargoBin="${remoteHome}/.cargo/bin"
  261. echo "Deploying software to bootstrap validator ($ipAddress)"
  262. case $deployMethod in
  263. tar)
  264. rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/solana-release/bin/* "$ipAddress:$remoteCargoBin/"
  265. rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/solana-release/version.yml "$ipAddress:~/"
  266. ;;
  267. local)
  268. rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:$remoteCargoBin/"
  269. rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/version.yml "$ipAddress:~/"
  270. ;;
  271. skip)
  272. ;;
  273. *)
  274. usage "Internal error: invalid deployMethod: $deployMethod"
  275. ;;
  276. esac
  277. }
  278. startBootstrapLeader() {
  279. declare ipAddress=$1
  280. declare nodeIndex="$2"
  281. declare logFile="$3"
  282. echo "--- Starting bootstrap validator: $ipAddress"
  283. echo "start log: $logFile"
  284. (
  285. set -x
  286. startCommon "$ipAddress" || exit 1
  287. [[ -z "$externalPrimordialAccountsFile" ]] || rsync -vPrc -e "ssh ${sshOptions[*]}" "$externalPrimordialAccountsFile" \
  288. "$ipAddress:$remoteExternalPrimordialAccountsFile"
  289. deployBootstrapValidator "$ipAddress"
  290. ssh "${sshOptions[@]}" -n "$ipAddress" \
  291. "./solana/net/remote/remote-node.sh \
  292. $deployMethod \
  293. bootstrap-validator \
  294. $entrypointIp \
  295. $((${#validatorIpList[@]} + ${#blockstreamerIpList[@]})) \
  296. \"$RUST_LOG\" \
  297. $skipSetup \
  298. $failOnValidatorBootupFailure \
  299. \"$remoteExternalPrimordialAccountsFile\" \
  300. \"$maybeDisableAirdrops\" \
  301. \"$internalNodesStakeLamports\" \
  302. \"$internalNodesLamports\" \
  303. $nodeIndex \
  304. ${#clientIpList[@]} \"$benchTpsExtraArgs\" \
  305. \"$genesisOptions\" \
  306. \"$maybeNoSnapshot $maybeSkipLedgerVerify $maybeLimitLedgerSize $maybeWaitForSupermajority $maybeAccountsDbSkipShrink $maybeSkipRequireTower\" \
  307. \"$gpuMode\" \
  308. \"$maybeWarpSlot\" \
  309. \"$maybeFullRpc\" \
  310. \"$waitForNodeInit\" \
  311. \"$extraPrimordialStakes\" \
  312. \"$TMPFS_ACCOUNTS\" \
  313. \"$disableQuic\" \
  314. \"$enableUdp\" \
  315. \"$maybeWenRestart\" \
  316. "
  317. ) >> "$logFile" 2>&1 || {
  318. cat "$logFile"
  319. echo "^^^ +++"
  320. exit 1
  321. }
  322. }
  323. startNode() {
  324. declare ipAddress=$1
  325. declare nodeType=$2
  326. declare nodeIndex="$3"
  327. initLogDir
  328. declare logFile="$netLogDir/validator-$ipAddress.log"
  329. if [[ -z $nodeType ]]; then
  330. echo nodeType not specified
  331. exit 1
  332. fi
  333. if [[ -z $nodeIndex ]]; then
  334. echo nodeIndex not specified
  335. exit 1
  336. fi
  337. echo "--- Starting $nodeType: $ipAddress"
  338. echo "start log: $logFile"
  339. (
  340. set -x
  341. startCommon "$ipAddress"
  342. if [[ $nodeType = blockstreamer ]] && [[ -n $letsEncryptDomainName ]]; then
  343. #
  344. # Create/renew TLS certificate
  345. #
  346. declare localArchive=~/letsencrypt-"$letsEncryptDomainName".tgz
  347. if [[ -r "$localArchive" ]]; then
  348. timeout 30s scp "${sshOptions[@]}" "$localArchive" "$ipAddress:letsencrypt.tgz"
  349. fi
  350. ssh "${sshOptions[@]}" -n "$ipAddress" \
  351. "sudo -H /certbot-restore.sh $letsEncryptDomainName maintainers@solanalabs.com"
  352. rm -f letsencrypt.tgz
  353. timeout 30s scp "${sshOptions[@]}" "$ipAddress:/letsencrypt.tgz" letsencrypt.tgz
  354. test -s letsencrypt.tgz # Ensure non-empty before overwriting $localArchive
  355. cp letsencrypt.tgz "$localArchive"
  356. fi
  357. ssh "${sshOptions[@]}" -n "$ipAddress" \
  358. "./solana/net/remote/remote-node.sh \
  359. $deployMethod \
  360. $nodeType \
  361. $entrypointIp \
  362. $((${#validatorIpList[@]} + ${#blockstreamerIpList[@]})) \
  363. \"$RUST_LOG\" \
  364. $skipSetup \
  365. $failOnValidatorBootupFailure \
  366. \"$remoteExternalPrimordialAccountsFile\" \
  367. \"$maybeDisableAirdrops\" \
  368. \"$internalNodesStakeLamports\" \
  369. \"$internalNodesLamports\" \
  370. $nodeIndex \
  371. ${#clientIpList[@]} \"$benchTpsExtraArgs\" \
  372. \"$genesisOptions\" \
  373. \"$maybeNoSnapshot $maybeSkipLedgerVerify $maybeLimitLedgerSize $maybeWaitForSupermajority $maybeAccountsDbSkipShrink $maybeSkipRequireTower\" \
  374. \"$gpuMode\" \
  375. \"$maybeWarpSlot\" \
  376. \"$maybeFullRpc\" \
  377. \"$waitForNodeInit\" \
  378. \"$extraPrimordialStakes\" \
  379. \"$TMPFS_ACCOUNTS\" \
  380. \"$disableQuic\" \
  381. \"$enableUdp\" \
  382. \"$maybeWenRestart\" \
  383. "
  384. ) >> "$logFile" 2>&1 &
  385. declare pid=$!
  386. ln -sf "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
  387. pids+=("$pid")
  388. }
  389. startClient() {
  390. declare ipAddress=$1
  391. declare clientToRun="$2"
  392. declare clientIndex="$3"
  393. initLogDir
  394. declare logFile="$netLogDir/client-$clientToRun-$ipAddress.log"
  395. echo "--- Starting client: $ipAddress - $clientToRun"
  396. echo "start log: $logFile"
  397. (
  398. set -x
  399. startCommon "$ipAddress"
  400. ssh "${sshOptions[@]}" -f "$ipAddress" \
  401. "./solana/net/remote/remote-client.sh $deployMethod $entrypointIp \
  402. $clientToRun \"$RUST_LOG\" \"$benchTpsExtraArgs\" $clientIndex $clientType \
  403. $maybeUseUnstakedConnection"
  404. ) >> "$logFile" 2>&1 || {
  405. cat "$logFile"
  406. echo "^^^ +++"
  407. exit 1
  408. }
  409. }
  410. startClients() {
  411. for ((i=0; i < "$numClients" && i < "$numClientsRequested"; i++)) do
  412. if [[ $i -lt "$numBenchTpsClients" ]]; then
  413. startClient "${clientIpList[$i]}" "solana-bench-tps" "$i"
  414. else
  415. startClient "${clientIpList[$i]}" "idle"
  416. fi
  417. done
  418. }
  419. sanity() {
  420. declare skipBlockstreamerSanity=$1
  421. $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
  422. declare ok=true
  423. declare bootstrapLeader=${validatorIpList[0]}
  424. declare blockstreamer=${blockstreamerIpList[0]}
  425. annotateBlockexplorerUrl
  426. echo "--- Sanity: $bootstrapLeader"
  427. (
  428. set -x
  429. # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
  430. ssh "${sshOptions[@]}" "$bootstrapLeader" \
  431. "./solana/net/remote/remote-sanity.sh $bootstrapLeader $sanityExtraArgs \"$RUST_LOG\""
  432. ) || ok=false
  433. $ok || exit 1
  434. if [[ -z $skipBlockstreamerSanity && -n $blockstreamer ]]; then
  435. # If there's a blockstreamer node run a reduced sanity check on it as well
  436. echo "--- Sanity: $blockstreamer"
  437. (
  438. set -x
  439. # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
  440. ssh "${sshOptions[@]}" "$blockstreamer" \
  441. "./solana/net/remote/remote-sanity.sh $blockstreamer $sanityExtraArgs \"$RUST_LOG\""
  442. ) || ok=false
  443. $ok || exit 1
  444. fi
  445. $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
  446. }
  447. deployUpdate() {
  448. if [[ -z $updatePlatforms ]]; then
  449. echo "No update platforms"
  450. return
  451. fi
  452. if [[ -z $releaseChannel ]]; then
  453. echo "Release channel not specified (use -t option)"
  454. exit 1
  455. fi
  456. declare ok=true
  457. declare bootstrapLeader=${validatorIpList[0]}
  458. for updatePlatform in $updatePlatforms; do
  459. echo "--- Deploying agave-install update: $updatePlatform"
  460. (
  461. set -x
  462. scripts/agave-install-update-manifest-keypair.sh "$updatePlatform"
  463. timeout 30s scp "${sshOptions[@]}" \
  464. update_manifest_keypair.json "$bootstrapLeader:solana/update_manifest_keypair.json"
  465. # shellcheck disable=SC2029 # remote-deploy-update.sh args are expanded on client side intentionally
  466. ssh "${sshOptions[@]}" "$bootstrapLeader" \
  467. "./solana/net/remote/remote-deploy-update.sh $releaseChannel $updatePlatform"
  468. ) || ok=false
  469. $ok || exit 1
  470. done
  471. }
  472. getNodeType() {
  473. echo "getNodeType: $nodeAddress"
  474. [[ -n $nodeAddress ]] || {
  475. echo "Error: nodeAddress not set"
  476. exit 1
  477. }
  478. nodeIndex=0 # <-- global
  479. nodeType=validator # <-- global
  480. for ipAddress in "${validatorIpList[@]}" b "${blockstreamerIpList[@]}"; do
  481. if [[ $ipAddress = b ]]; then
  482. nodeType=blockstreamer
  483. continue
  484. fi
  485. if [[ $ipAddress = "$nodeAddress" ]]; then
  486. echo "getNodeType: $nodeType ($nodeIndex)"
  487. return
  488. fi
  489. ((nodeIndex = nodeIndex + 1))
  490. done
  491. echo "Error: Unknown node: $nodeAddress"
  492. exit 1
  493. }
  494. prepareDeploy() {
  495. case $deployMethod in
  496. tar)
  497. if [[ -n $releaseChannel ]]; then
  498. echo "Downloading release from channel: $releaseChannel"
  499. rm -f "$SOLANA_ROOT"/solana-release.tar.bz2
  500. declare updateDownloadUrl=https://release.anza.xyz/"$releaseChannel"/solana-release-x86_64-unknown-linux-gnu.tar.bz2
  501. (
  502. set -x
  503. curl -L -I "$updateDownloadUrl"
  504. curl -L --retry 5 --retry-delay 2 --retry-connrefused \
  505. -o "$SOLANA_ROOT"/solana-release.tar.bz2 "$updateDownloadUrl"
  506. )
  507. tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2
  508. fi
  509. (
  510. set -x
  511. rm -rf "$SOLANA_ROOT"/solana-release
  512. cd "$SOLANA_ROOT"; tar jfxv "$tarballFilename"
  513. cat "$SOLANA_ROOT"/solana-release/version.yml
  514. )
  515. ;;
  516. local)
  517. if $doBuild; then
  518. build
  519. else
  520. echo "Build skipped due to --no-build"
  521. fi
  522. ;;
  523. skip)
  524. ;;
  525. *)
  526. usage "Internal error: invalid deployMethod: $deployMethod"
  527. ;;
  528. esac
  529. if [[ -n $deployIfNewer ]]; then
  530. if [[ $deployMethod != tar ]]; then
  531. echo "Error: --deploy-if-newer only supported for tar deployments"
  532. exit 1
  533. fi
  534. echo "Fetching current software version"
  535. (
  536. set -x
  537. rsync -vPrc -e "ssh ${sshOptions[*]}" "${validatorIpList[0]}":~/version.yml current-version.yml
  538. )
  539. cat current-version.yml
  540. if ! diff -q current-version.yml "$SOLANA_ROOT"/solana-release/version.yml; then
  541. echo "Cluster software version is old. Update required"
  542. else
  543. echo "Cluster software version is current. No update required"
  544. exit 0
  545. fi
  546. fi
  547. }
  548. deploy() {
  549. initLogDir
  550. echo "Deployment started at $(date)"
  551. $metricsWriteDatapoint "testnet-deploy net-start-begin=1"
  552. declare bootstrapLeader=true
  553. for nodeAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
  554. nodeType=
  555. nodeIndex=
  556. getNodeType
  557. if $bootstrapLeader; then
  558. SECONDS=0
  559. declare bootstrapNodeDeployTime=
  560. startBootstrapLeader "$nodeAddress" "$nodeIndex" "$netLogDir/bootstrap-validator-$ipAddress.log"
  561. bootstrapNodeDeployTime=$SECONDS
  562. $metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1"
  563. bootstrapLeader=false
  564. SECONDS=0
  565. pids=()
  566. else
  567. startNode "$ipAddress" "$nodeType" "$nodeIndex"
  568. # Stagger additional node start time. If too many nodes start simultaneously
  569. # the bootstrap node gets more rsync requests from the additional nodes than
  570. # it can handle.
  571. sleep 2
  572. fi
  573. done
  574. for pid in "${pids[@]}"; do
  575. declare ok=true
  576. wait "$pid" || ok=false
  577. if ! $ok; then
  578. echo "+++ validator failed to start"
  579. cat "$netLogDir/validator-$pid.log"
  580. if $failOnValidatorBootupFailure; then
  581. exit 1
  582. else
  583. echo "Failure is non-fatal"
  584. fi
  585. fi
  586. done
  587. if ! $waitForNodeInit; then
  588. # Handle async init
  589. declare startTime=$SECONDS
  590. for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
  591. declare timeWaited=$((SECONDS - startTime))
  592. if [[ $timeWaited -gt 600 ]]; then
  593. break
  594. fi
  595. ssh "${sshOptions[@]}" -n "$ipAddress" \
  596. "./solana/net/remote/remote-node-wait-init.sh $((600 - timeWaited))"
  597. done
  598. fi
  599. $metricsWriteDatapoint "testnet-deploy net-validators-started=1"
  600. additionalNodeDeployTime=$SECONDS
  601. annotateBlockexplorerUrl
  602. sanity skipBlockstreamerSanity # skip sanity on blockstreamer node, it may not
  603. # have caught up to the bootstrap validator yet
  604. echo "--- Sleeping $clientDelayStart seconds after validators are started before starting clients"
  605. sleep "$clientDelayStart"
  606. SECONDS=0
  607. startClients
  608. clientDeployTime=$SECONDS
  609. $metricsWriteDatapoint "testnet-deploy net-start-complete=1"
  610. declare networkVersion=unknown
  611. case $deployMethod in
  612. tar)
  613. networkVersion="$(
  614. (
  615. set -o pipefail
  616. grep "^commit: " "$SOLANA_ROOT"/solana-release/version.yml | head -n1 | cut -d\ -f2
  617. ) || echo "tar-unknown"
  618. )"
  619. ;;
  620. local)
  621. networkVersion="$(git rev-parse HEAD || echo local-unknown)"
  622. ;;
  623. skip)
  624. ;;
  625. *)
  626. usage "Internal error: invalid deployMethod: $deployMethod"
  627. ;;
  628. esac
  629. $metricsWriteDatapoint "testnet-deploy version=\"${networkVersion:0:9}\""
  630. echo
  631. echo "--- Deployment Successful"
  632. echo "Bootstrap validator deployment took $bootstrapNodeDeployTime seconds"
  633. echo "Additional validator deployment (${#validatorIpList[@]} validators, ${#blockstreamerIpList[@]} blockstreamer nodes) took $additionalNodeDeployTime seconds"
  634. echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
  635. echo "Network start logs in $netLogDir"
  636. }
  637. stopNode() {
  638. local ipAddress=$1
  639. local block=$2
  640. initLogDir
  641. declare logFile="$netLogDir/stop-validator-$ipAddress.log"
  642. echo "--- Stopping node: $ipAddress"
  643. echo "stop log: $logFile"
  644. syncScripts "$ipAddress"
  645. (
  646. # Since cleanup.sh does a pkill, we cannot pass the command directly,
  647. # otherwise the process which is doing the killing will be killed because
  648. # the script itself will match the pkill pattern
  649. set -x
  650. # shellcheck disable=SC2029 # It's desired that PS4 be expanded on the client side
  651. ssh "${sshOptions[@]}" "$ipAddress" "PS4=\"$PS4\" ./solana/net/remote/cleanup.sh"
  652. ) >> "$logFile" 2>&1 &
  653. declare pid=$!
  654. ln -sf "stop-validator-$ipAddress.log" "$netLogDir/stop-validator-$pid.log"
  655. if $block; then
  656. wait $pid || true
  657. else
  658. pids+=("$pid")
  659. fi
  660. }
  661. stop() {
  662. SECONDS=0
  663. $metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
  664. declare loopCount=0
  665. pids=()
  666. for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}" "${clientIpList[@]}"; do
  667. stopNode "$ipAddress" false
  668. # Stagger additional node stop time to avoid too many concurrent ssh
  669. # sessions
  670. ((loopCount++ % 4 == 0)) && sleep 2
  671. done
  672. echo --- Waiting for nodes to finish stopping
  673. for pid in "${pids[@]}"; do
  674. echo -n "$pid "
  675. wait "$pid" || true
  676. done
  677. echo
  678. $metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
  679. echo "Stopping nodes took $SECONDS seconds"
  680. }
  681. checkPremptibleInstances() {
  682. # The validatorIpList nodes may be preemptible instances that can disappear at
  683. # any time. Try to detect when a validator has been preempted to help the user
  684. # out.
  685. #
  686. # Of course this isn't airtight as an instance could always disappear
  687. # immediately after its successfully pinged.
  688. for ipAddress in "${validatorIpList[@]}"; do
  689. (
  690. timeout 5s ping -c 1 "$ipAddress" | tr - _ &>/dev/null
  691. ) || {
  692. cat <<EOF
  693. Warning: $ipAddress may have been preempted.
  694. Run |./gce.sh config| to restart it
  695. EOF
  696. exit 1
  697. }
  698. done
  699. }
  700. releaseChannel=
  701. deployMethod=local
  702. deployIfNewer=
  703. sanityExtraArgs=
  704. skipSetup=false
  705. updatePlatforms=
  706. nodeAddress=
  707. numIdleClients=0
  708. numBenchTpsClients=0
  709. benchTpsExtraArgs=
  710. failOnValidatorBootupFailure=true
  711. genesisOptions=
  712. numValidatorsRequested=
  713. externalPrimordialAccountsFile=
  714. remoteExternalPrimordialAccountsFile=
  715. internalNodesStakeLamports=
  716. internalNodesLamports=
  717. maybeNoSnapshot=""
  718. maybeLimitLedgerSize=""
  719. maybeSkipLedgerVerify=""
  720. maybeDisableAirdrops=""
  721. maybeWaitForSupermajority=""
  722. maybeAccountsDbSkipShrink=""
  723. maybeSkipRequireTower=""
  724. debugBuild=false
  725. profileBuild=false
  726. doBuild=true
  727. gpuMode=auto
  728. netemPartition=""
  729. netemConfig=""
  730. netemConfigFile=""
  731. netemCommand="add"
  732. clientDelayStart=0
  733. netLogDir=
  734. maybeWarpSlot=
  735. maybeFullRpc=false
  736. waitForNodeInit=true
  737. extraPrimordialStakes=0
  738. disableQuic=false
  739. enableUdp=false
  740. clientType=tpu-client
  741. maybeUseUnstakedConnection=""
  742. maybeWenRestart=""
  743. command=$1
  744. [[ -n $command ]] || usage
  745. shift
  746. shortArgs=()
  747. while [[ -n $1 ]]; do
  748. if [[ ${1:0:2} = -- ]]; then
  749. if [[ $1 = --hashes-per-tick ]]; then
  750. genesisOptions="$genesisOptions $1 $2"
  751. shift 2
  752. elif [[ $1 = --slots-per-epoch ]]; then
  753. genesisOptions="$genesisOptions $1 $2"
  754. shift 2
  755. elif [[ $1 = --target-lamports-per-signature ]]; then
  756. genesisOptions="$genesisOptions $1 $2"
  757. shift 2
  758. elif [[ $1 = --faucet-lamports ]]; then
  759. genesisOptions="$genesisOptions $1 $2"
  760. shift 2
  761. elif [[ $1 = --cluster-type ]]; then
  762. case "$2" in
  763. development|devnet|testnet|mainnet-beta)
  764. ;;
  765. *)
  766. echo "Unexpected cluster type: \"$2\""
  767. exit 1
  768. ;;
  769. esac
  770. genesisOptions="$genesisOptions $1 $2"
  771. shift 2
  772. elif [[ $1 = --slots-per-epoch ]]; then
  773. genesisOptions="$genesisOptions $1 $2"
  774. shift 2
  775. elif [[ $1 = --no-snapshot-fetch ]]; then
  776. maybeNoSnapshot="$1"
  777. shift 1
  778. elif [[ $1 = --deploy-if-newer ]]; then
  779. deployIfNewer=1
  780. shift 1
  781. elif [[ $1 = --no-deploy ]]; then
  782. deployMethod=skip
  783. shift 1
  784. elif [[ $1 = --no-build ]]; then
  785. doBuild=false
  786. shift 1
  787. elif [[ $1 = --limit-ledger-size ]]; then
  788. maybeLimitLedgerSize="$1 $2"
  789. shift 2
  790. elif [[ $1 = --skip-poh-verify ]]; then
  791. maybeSkipLedgerVerify="$1"
  792. shift 1
  793. elif [[ $1 = --skip-setup ]]; then
  794. skipSetup=true
  795. shift 1
  796. elif [[ $1 = --platform ]]; then
  797. updatePlatforms="$updatePlatforms $2"
  798. shift 2
  799. elif [[ $1 = --internal-nodes-stake-lamports ]]; then
  800. internalNodesStakeLamports="$2"
  801. shift 2
  802. elif [[ $1 = --internal-nodes-lamports ]]; then
  803. internalNodesLamports="$2"
  804. shift 2
  805. elif [[ $1 = --external-accounts-file ]]; then
  806. externalPrimordialAccountsFile="$2"
  807. remoteExternalPrimordialAccountsFile=/tmp/external-primordial-accounts.yml
  808. shift 2
  809. elif [[ $1 = --no-airdrop ]]; then
  810. maybeDisableAirdrops="$1"
  811. shift 1
  812. elif [[ $1 = --debug ]]; then
  813. debugBuild=true
  814. shift 1
  815. elif [[ $1 = --profile ]]; then
  816. profileBuild=true
  817. shift 1
  818. elif [[ $1 = --partition ]]; then
  819. netemPartition=$2
  820. shift 2
  821. elif [[ $1 = --config ]]; then
  822. netemConfig=$2
  823. shift 2
  824. elif [[ $1 == --config-file ]]; then
  825. netemConfigFile=$2
  826. shift 2
  827. elif [[ $1 == --netem-cmd ]]; then
  828. netemCommand=$2
  829. shift 2
  830. elif [[ $1 = --gpu-mode ]]; then
  831. gpuMode=$2
  832. case "$gpuMode" in
  833. on|off|auto|cuda)
  834. ;;
  835. *)
  836. echo "Unexpected GPU mode: \"$gpuMode\""
  837. exit 1
  838. ;;
  839. esac
  840. shift 2
  841. elif [[ $1 == --client-delay-start ]]; then
  842. clientDelayStart=$2
  843. shift 2
  844. elif [[ $1 == --wait-for-supermajority ]]; then
  845. maybeWaitForSupermajority="$1 $2"
  846. shift 2
  847. elif [[ $1 == --warp-slot ]]; then
  848. maybeWarpSlot="$1 $2"
  849. shift 2
  850. elif [[ $1 == --full-rpc ]]; then
  851. maybeFullRpc=true
  852. shift 1
  853. elif [[ $1 == --tpu-disable-quic ]]; then
  854. disableQuic=true
  855. shift 1
  856. elif [[ $1 == --tpu-enable-udp ]]; then
  857. enableUdp=true
  858. shift 1
  859. elif [[ $1 == --async-node-init ]]; then
  860. waitForNodeInit=false
  861. shift 1
  862. elif [[ $1 == --extra-primordial-stakes ]]; then
  863. extraPrimordialStakes=$2
  864. shift 2
  865. elif [[ $1 = --allow-private-addr ]]; then
  866. echo "--allow-private-addr is a default value"
  867. shift 1
  868. elif [[ $1 = --accounts-db-skip-shrink ]]; then
  869. maybeAccountsDbSkipShrink="$1"
  870. shift 1
  871. elif [[ $1 = --skip-require-tower ]]; then
  872. maybeSkipRequireTower="$1"
  873. shift 1
  874. elif [[ $1 = --client-type ]]; then
  875. clientType=$2
  876. case "$clientType" in
  877. tpu-client|rpc-client)
  878. ;;
  879. *)
  880. echo "Unexpected client type: \"$clientType\""
  881. exit 1
  882. ;;
  883. esac
  884. shift 2
  885. elif [[ $1 = --use-unstaked-connection ]]; then
  886. maybeUseUnstakedConnection="$1"
  887. shift 1
  888. elif [[ $1 = --wen-restart ]]; then
  889. # wen_restart needs tower storage to be there, so set skipSetup to true
  890. # to avoid erasing the tower storage on disk.
  891. skipSetup=true
  892. maybeWenRestart="$2"
  893. shift 2
  894. else
  895. usage "Unknown long option: $1"
  896. fi
  897. else
  898. shortArgs+=("$1")
  899. shift
  900. fi
  901. done
  902. while getopts "h?T:t:o:f:rc:Fn:i:d" opt "${shortArgs[@]}"; do
  903. case $opt in
  904. h | \?)
  905. usage
  906. ;;
  907. T)
  908. tarballFilename=$OPTARG
  909. [[ -r $tarballFilename ]] || usage "File not readable: $tarballFilename"
  910. deployMethod=tar
  911. ;;
  912. t)
  913. case $OPTARG in
  914. edge|beta|stable|v*)
  915. releaseChannel=$OPTARG
  916. deployMethod=tar
  917. ;;
  918. *)
  919. usage "Invalid release channel: $OPTARG"
  920. ;;
  921. esac
  922. ;;
  923. n)
  924. numValidatorsRequested=$OPTARG
  925. ;;
  926. r)
  927. skipSetup=true
  928. ;;
  929. o)
  930. case $OPTARG in
  931. rejectExtraNodes|noInstallCheck)
  932. sanityExtraArgs="$sanityExtraArgs -o $OPTARG"
  933. ;;
  934. *)
  935. usage "Unknown option: $OPTARG"
  936. ;;
  937. esac
  938. ;;
  939. c)
  940. getClientTypeAndNum() {
  941. if ! [[ $OPTARG == *'='* ]]; then
  942. echo "Error: Expecting tuple \"clientType=numClientType=extraArgs\" but got \"$OPTARG\""
  943. exit 1
  944. fi
  945. local keyValue
  946. IFS='=' read -ra keyValue <<< "$OPTARG"
  947. local clientType=${keyValue[0]}
  948. local numClients=${keyValue[1]}
  949. local extraArgs=${keyValue[2]}
  950. re='^[0-9]+$'
  951. if ! [[ $numClients =~ $re ]] ; then
  952. echo "error: numClientType must be a number but got \"$numClients\""
  953. exit 1
  954. fi
  955. case $clientType in
  956. idle)
  957. numIdleClients=$numClients
  958. # $extraArgs ignored for 'idle'
  959. ;;
  960. bench-tps)
  961. numBenchTpsClients=$numClients
  962. benchTpsExtraArgs=$extraArgs
  963. ;;
  964. *)
  965. echo "Unknown client type: $clientType"
  966. exit 1
  967. ;;
  968. esac
  969. }
  970. getClientTypeAndNum
  971. ;;
  972. F)
  973. failOnValidatorBootupFailure=false
  974. ;;
  975. i)
  976. nodeAddress=$OPTARG
  977. ;;
  978. d)
  979. debugBuild=true
  980. ;;
  981. *)
  982. usage "Error: unhandled option: $opt"
  983. ;;
  984. esac
  985. done
  986. loadConfigFile
  987. if [[ -n $numValidatorsRequested ]]; then
  988. truncatedNodeList=( "${validatorIpList[@]:0:$numValidatorsRequested}" )
  989. unset validatorIpList
  990. validatorIpList=( "${truncatedNodeList[@]}" )
  991. fi
  992. numClients=${#clientIpList[@]}
  993. numClientsRequested=$((numBenchTpsClients + numIdleClients))
  994. if [[ "$numClientsRequested" -eq 0 ]]; then
  995. numBenchTpsClients=$numClients
  996. numClientsRequested=$numClients
  997. else
  998. if [[ "$numClientsRequested" -gt "$numClients" ]]; then
  999. echo "Error: More clients requested ($numClientsRequested) then available ($numClients)"
  1000. exit 1
  1001. fi
  1002. fi
  1003. if [[ -n "$maybeWaitForSupermajority" && -n "$maybeWarpSlot" ]]; then
  1004. read -r _ waitSlot <<<"$maybeWaitForSupermajority"
  1005. read -r _ warpSlot <<<"$maybeWarpSlot"
  1006. if [[ $waitSlot -ne $warpSlot ]]; then
  1007. echo "Error: When specifying both --wait-for-supermajority and --warp-slot,"
  1008. echo "they must use the same slot. ($waitSlot != $warpSlot)"
  1009. exit 1
  1010. fi
  1011. fi
  1012. echo "net.sh: Primordial stakes: $extraPrimordialStakes"
  1013. if [[ $extraPrimordialStakes -gt 0 ]]; then
  1014. # Extra primoridial stakes require that all of the validators start at
  1015. # the same time. Force async init and wait for supermajority here.
  1016. waitForNodeInit=false
  1017. if [[ -z "$maybeWaitForSupermajority" ]]; then
  1018. waitSlot=
  1019. if [[ -n "$maybeWarpSlot" ]]; then
  1020. read -r _ waitSlot <<<"$maybeWarpSlot"
  1021. else
  1022. waitSlot=1
  1023. fi
  1024. maybeWaitForSupermajority="--wait-for-supermajority $waitSlot"
  1025. fi
  1026. fi
  1027. checkPremptibleInstances
  1028. case $command in
  1029. restart)
  1030. prepareDeploy
  1031. stop
  1032. deploy
  1033. ;;
  1034. start)
  1035. prepareDeploy
  1036. deploy
  1037. ;;
  1038. prepare)
  1039. prepareDeploy
  1040. ;;
  1041. sanity)
  1042. sanity
  1043. ;;
  1044. stop)
  1045. stop
  1046. ;;
  1047. update)
  1048. deployUpdate
  1049. ;;
  1050. upgrade)
  1051. bootstrapValidatorIp="${validatorIpList[0]}"
  1052. prepareDeploy
  1053. deployBootstrapValidator "$bootstrapValidatorIp"
  1054. # (start|stop)Node need refactored to support restarting the bootstrap validator
  1055. ;;
  1056. stopnode)
  1057. if [[ -z $nodeAddress ]]; then
  1058. usage "node address (-i) not specified"
  1059. exit 1
  1060. fi
  1061. stopNode "$nodeAddress" true
  1062. ;;
  1063. startnode)
  1064. if [[ -z $nodeAddress ]]; then
  1065. usage "node address (-i) not specified"
  1066. exit 1
  1067. fi
  1068. nodeType=
  1069. nodeIndex=
  1070. getNodeType
  1071. startNode "$nodeAddress" "$nodeType" "$nodeIndex"
  1072. ;;
  1073. startclients)
  1074. startClients
  1075. ;;
  1076. logs)
  1077. initLogDir
  1078. fetchRemoteLog() {
  1079. declare ipAddress=$1
  1080. declare log=$2
  1081. echo "--- fetching $log from $ipAddress"
  1082. (
  1083. set -x
  1084. timeout 30s scp "${sshOptions[@]}" \
  1085. "$ipAddress":solana/"$log".log "$netLogDir"/remote-"$log"-"$ipAddress".log
  1086. ) || echo "failed to fetch log"
  1087. }
  1088. fetchRemoteLog "${validatorIpList[0]}" faucet
  1089. for ipAddress in "${validatorIpList[@]}"; do
  1090. fetchRemoteLog "$ipAddress" validator
  1091. done
  1092. for ipAddress in "${clientIpList[@]}"; do
  1093. fetchRemoteLog "$ipAddress" client
  1094. done
  1095. for ipAddress in "${blockstreamerIpList[@]}"; do
  1096. fetchRemoteLog "$ipAddress" validator
  1097. done
  1098. ;;
  1099. netem)
  1100. if [[ -n $netemConfigFile ]]; then
  1101. remoteNetemConfigFile="$(basename "$netemConfigFile")"
  1102. if [[ $netemCommand = "add" ]]; then
  1103. for ipAddress in "${validatorIpList[@]}"; do
  1104. remoteHome=$(remoteHomeDir "$ipAddress")
  1105. remoteSolanaHome="${remoteHome}/solana"
  1106. "$here"/scp.sh "$netemConfigFile" solana@"$ipAddress":"$remoteSolanaHome"
  1107. done
  1108. fi
  1109. for i in "${!validatorIpList[@]}"; do
  1110. "$here"/ssh.sh solana@"${validatorIpList[$i]}" 'solana/scripts/net-shaper.sh' \
  1111. "$netemCommand" ~solana/solana/"$remoteNetemConfigFile" "${#validatorIpList[@]}" "$i"
  1112. done
  1113. else
  1114. num_nodes=$((${#validatorIpList[@]}*netemPartition/100))
  1115. if [[ $((${#validatorIpList[@]}*netemPartition%100)) -gt 0 ]]; then
  1116. num_nodes=$((num_nodes+1))
  1117. fi
  1118. if [[ "$num_nodes" -gt "${#validatorIpList[@]}" ]]; then
  1119. num_nodes=${#validatorIpList[@]}
  1120. fi
  1121. # Stop netem on all nodes
  1122. for ipAddress in "${validatorIpList[@]}"; do
  1123. "$here"/ssh.sh solana@"$ipAddress" 'solana/scripts/netem.sh delete < solana/netem.cfg || true'
  1124. done
  1125. # Start netem on required nodes
  1126. for ((i=0; i<num_nodes; i++ )); do :
  1127. "$here"/ssh.sh solana@"${validatorIpList[$i]}" "echo $netemConfig > solana/netem.cfg; solana/scripts/netem.sh add \"$netemConfig\""
  1128. done
  1129. fi
  1130. ;;
  1131. *)
  1132. echo "Internal error: Unknown command: $command"
  1133. usage
  1134. exit 1
  1135. esac