| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249 |
- #!/usr/bin/env bash
- set -e
- here=$(dirname "$0")
- SOLANA_ROOT="$(cd "$here"/..; pwd)"
- # shellcheck source=net/common.sh
- source "$here"/common.sh
- usage() {
- exitcode=0
- if [[ -n "$1" ]]; then
- exitcode=1
- echo "Error: $*"
- fi
- CLIENT_OPTIONS=$(cat << EOM
- -c clientType=numClients=extraArgs - Number of clientTypes to start. This options can be specified
- more than once. Defaults to bench-tps for all clients if not
- specified.
- Valid client types are:
- idle
- bench-tps
- User can optionally provide extraArgs that are transparently
- supplied to the client program as command line parameters.
- For example,
- -c bench-tps=2="--tx_count 25000"
- This will start 2 bench-tps clients, and supply "--tx_count 25000"
- to the bench-tps client.
- --use-unstaked-connection - Use unstaked connection. By default, staked connection with
- bootstrap node credendials is used.
- EOM
- )
- cat <<EOF
- usage: $0 [start|stop|restart|sanity] [command-specific options]
- Operate a configured testnet
- start - Start the network
- sanity - Sanity check the network
- stop - Stop the network
- restart - Shortcut for stop then start
- logs - Fetch remote logs from each network node
- startnode - Start an individual node (previously stopped with stopNode)
- stopnode - Stop an individual node
- startclients - Start client nodes only
- prepare - Prepare software deployment. (Build/download the software release)
- update - Deploy a new software update to the cluster
- upgrade - Upgrade software on bootstrap validator. (Restart bootstrap validator manually to run it)
- start-specific options:
- -T [tarFilename] - Deploy the specified release tarball
- -t edge|beta|stable|vX.Y.Z - Deploy the latest tarball release for the
- specified release channel (edge|beta|stable) or release tag
- (vX.Y.Z)
- -r / --skip-setup - Reuse existing node/ledger configuration from a
- previous |start| (ie, don't run ./multinode-demo/setup.sh).
- -d / --debug - Build/deploy the testnet with debug binaries
- $CLIENT_OPTIONS
- --client-delay-start
- - Number of seconds to wait after validators have finished starting before starting client programs
- (default: $clientDelayStart)
- -n NUM_VALIDATORS - Number of validators to apply command to.
- --gpu-mode GPU_MODE - Specify GPU mode to launch validators with (default: $gpuMode).
- MODE must be one of
- on - GPU *required*, any vendor *
- off - No GPU, CPU-only
- auto - Use GPU if available, any vendor *
- cuda - GPU *required*, Nvidia CUDA only
- * Currently, Nvidia CUDA is the only supported GPU vendor
- --hashes-per-tick NUM_HASHES|sleep|auto
- - Override the default --hashes-per-tick for the cluster
- --no-airdrop
- - If set, disables the faucet keypair. Nodes must be funded in genesis config
- --faucet-lamports NUM_LAMPORTS_TO_MINT
- - Override the default 500000000000000000 lamports minted in genesis
- --extra-primordial-stakes NUM_EXTRA_PRIMORDIAL_STAKES
- - Number of nodes to be initially staked in genesis.
- Gives extra stake in genesis to NUM_EXTRA_PRIMORDIAL_STAKES many nodes.
- Implies --wait-for-supermajority 1 --async-node-init and the supermajority
- wait slot may be overridden with the corresponding flag
- --internal-nodes-stake-lamports NUM_LAMPORTS_PER_NODE
- - Amount to stake internal nodes.
- --internal-nodes-lamports NUM_LAMPORTS_PER_NODE
- - Amount to fund internal nodes in genesis config.
- --external-accounts-file FILE_PATH
- - A YML file with a list of account pubkeys and corresponding lamport balances
- in genesis config for external nodes
- --no-snapshot-fetch
- - If set, disables booting validators from a snapshot
- --skip-poh-verify
- - If set, validators will skip verifying
- the ledger they already have saved to disk at
- boot (results in a much faster boot)
- --no-deploy
- - Don't deploy new software, use the
- existing deployment
- --no-build
- - Don't build new software, deploy the
- existing binaries
- --deploy-if-newer - Only deploy if newer software is
- available (requires -t or -T)
- --cluster-type development|devnet|testnet|mainnet-beta
- - Specify whether or not to launch the cluster in "development" mode with all features enabled at epoch 0,
- or various other live clusters' feature set (default: development)
- --slots-per-epoch SLOTS
- - Override the number of slots in an epoch
- --warp-slot WARP_SLOT
- - Boot from a snapshot that has warped ahead to WARP_SLOT rather than a slot 0 genesis.
- --full-rpc
- - Support full RPC services on all nodes
- --tpu-disable-quic
- - Disable quic for tpu packet forwarding
- --tpu-enable-udp
- - Enable UDP for tpu transactions
- --client-type
- - Specify backend client type for bench-tps. Valid options are (rpc-client|tpu-client), tpu-client is default
- sanity/start-specific options:
- -F - Discard validator nodes that didn't bootup successfully
- -o noInstallCheck - Skip agave-install sanity
- -o rejectExtraNodes - Require the exact number of nodes
- stop-specific options:
- none
- logs-specific options:
- none
- netem-specific options:
- --config - Netem configuration (as a double quoted string)
- --parition - Percentage of network that should be configured with netem
- --config-file - Configuration file for partition and netem configuration
- --netem-cmd - Optional command argument to netem. Default is "add". Use "cleanup" to remove rules.
- update-specific options:
- --platform linux|osx|windows - Deploy the tarball using 'agave-install deploy ...' for the
- given platform (multiple platforms may be specified)
- (-t option must be supplied as well)
- startnode/stopnode-specific options:
- -i [ip address] - IP Address of the node to start or stop
- startnode specific option:
- --wen-restart [coordinator_pubkey] - Use given coordinator pubkey and apply wen_restat
- startclients-specific options:
- $CLIENT_OPTIONS
- Note: if RUST_LOG is set in the environment it will be propagated into the
- network nodes.
- EOF
- exit $exitcode
- }
- initLogDir() { # Initializes the netLogDir global variable. Idempotent
- [[ -z $netLogDir ]] || return 0
- netLogDir="$netDir"/log
- declare netLogDateDir
- netLogDateDir="$netDir"/log-$(date +"%Y-%m-%d_%H_%M_%S")
- if [[ -d $netLogDir && ! -L $netLogDir ]]; then
- echo "Warning: moving $netLogDir to make way for symlink."
- mv "$netLogDir" "$netDir"/log.old
- elif [[ -L $netLogDir ]]; then
- rm "$netLogDir"
- fi
- mkdir -p "$netConfigDir" "$netLogDateDir"
- ln -sf "$netLogDateDir" "$netLogDir"
- echo "Log directory: $netLogDateDir"
- }
- annotate() {
- [[ -z $BUILDKITE ]] || {
- buildkite-agent annotate "$@"
- }
- }
- annotateBlockexplorerUrl() {
- declare blockstreamer=${blockstreamerIpList[0]}
- if [[ -n $blockstreamer ]]; then
- annotate --style info --context blockexplorer-url "Block explorer: http://$blockstreamer/"
- fi
- }
- build() {
- supported=("22.04")
- declare MAYBE_DOCKER=
- if [[ $(uname) != Linux || ! " ${supported[*]} " =~ $(lsb_release -sr) ]]; then
- # shellcheck source=ci/docker/env.sh
- source "$SOLANA_ROOT"/ci/docker/env.sh
- MAYBE_DOCKER="ci/docker-run.sh ${CI_DOCKER_IMAGE:?}"
- fi
- SECONDS=0
- (
- cd "$SOLANA_ROOT"
- echo "--- Build started at $(date)"
- set -x
- rm -rf farf
- buildVariant=
- if $debugBuild; then
- buildVariant=--debug
- fi
- if $profileBuild; then
- profilerFlags="RUSTFLAGS='-C force-frame-pointers=y -g ${RUSTFLAGS}'"
- fi
- $MAYBE_DOCKER bash -c "
- set -ex
- $profilerFlags scripts/cargo-install-all.sh farf $buildVariant --validator-only
- "
- )
- (
- set +e
- COMMIT="$(git rev-parse HEAD)"
- BRANCH="$(git rev-parse --abbrev-ref HEAD)"
- TAG="$(git describe --exact-match --tags HEAD 2>/dev/null)"
- if [[ $TAG =~ ^v[0-9]+\.[0-9]+\.[0-9]+ ]]; then
- NOTE=$TAG
- else
- NOTE=$BRANCH
- fi
- (
- echo "channel: devbuild $NOTE"
- echo "commit: $COMMIT"
- ) > "$SOLANA_ROOT"/farf/version.yml
- )
- echo "Build took $SECONDS seconds"
- }
- remoteHomeDir() {
- declare ipAddress=$1
- declare remoteHome
- remoteHome="$(ssh "${sshOptions[@]}" "$ipAddress" "echo \$HOME")"
- echo "$remoteHome"
- }
- startCommon() {
- declare ipAddress=$1
- declare remoteHome
- remoteHome=$(remoteHomeDir "$ipAddress")
- local remoteSolanaHome="${remoteHome}/solana"
- local remoteCargoBin="${remoteHome}/.cargo/bin"
- test -d "$SOLANA_ROOT"
- if $skipSetup; then
- # shellcheck disable=SC2029
- ssh "${sshOptions[@]}" "$ipAddress" "
- set -x;
- mkdir -p $remoteSolanaHome/config;
- rm -rf ~/config;
- mv $remoteSolanaHome/config ~;
- rm -rf $remoteSolanaHome;
- mkdir -p $remoteSolanaHome $remoteCargoBin;
- mv ~/config $remoteSolanaHome/
- "
- else
- # shellcheck disable=SC2029
- ssh "${sshOptions[@]}" "$ipAddress" "
- set -x;
- rm -rf $remoteSolanaHome;
- mkdir -p $remoteCargoBin
- "
- fi
- [[ -z "$externalNodeSshKey" ]] || ssh-copy-id -f -i "$externalNodeSshKey" "${sshOptions[@]}" "solana@$ipAddress"
- syncScripts "$ipAddress"
- }
- syncScripts() {
- echo "rsyncing scripts... to $ipAddress"
- declare ipAddress=$1
- declare remoteHome
- remoteHome=$(remoteHomeDir "$ipAddress")
- local remoteSolanaHome="${remoteHome}/solana"
- rsync -vPrc -e "ssh ${sshOptions[*]}" \
- --exclude 'net/log*' \
- "$SOLANA_ROOT"/{fetch-perf-libs.sh,fetch-programs.sh,fetch-core-bpf.sh,fetch-spl.sh,scripts,net,multinode-demo} \
- "$ipAddress":"$remoteSolanaHome"/ > /dev/null
- }
- # Deploy local binaries to bootstrap validator. Other validators and clients later fetch the
- # binaries from it
- deployBootstrapValidator() {
- declare ipAddress=$1
- declare remoteHome
- remoteHome=$(remoteHomeDir "$ipAddress")
- local remoteCargoBin="${remoteHome}/.cargo/bin"
- echo "Deploying software to bootstrap validator ($ipAddress)"
- case $deployMethod in
- tar)
- rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/solana-release/bin/* "$ipAddress:$remoteCargoBin/"
- rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/solana-release/version.yml "$ipAddress:~/"
- ;;
- local)
- rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:$remoteCargoBin/"
- rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/version.yml "$ipAddress:~/"
- ;;
- skip)
- ;;
- *)
- usage "Internal error: invalid deployMethod: $deployMethod"
- ;;
- esac
- }
- startBootstrapLeader() {
- declare ipAddress=$1
- declare nodeIndex="$2"
- declare logFile="$3"
- echo "--- Starting bootstrap validator: $ipAddress"
- echo "start log: $logFile"
- (
- set -x
- startCommon "$ipAddress" || exit 1
- [[ -z "$externalPrimordialAccountsFile" ]] || rsync -vPrc -e "ssh ${sshOptions[*]}" "$externalPrimordialAccountsFile" \
- "$ipAddress:$remoteExternalPrimordialAccountsFile"
- deployBootstrapValidator "$ipAddress"
- ssh "${sshOptions[@]}" -n "$ipAddress" \
- "./solana/net/remote/remote-node.sh \
- $deployMethod \
- bootstrap-validator \
- $entrypointIp \
- $((${#validatorIpList[@]} + ${#blockstreamerIpList[@]})) \
- \"$RUST_LOG\" \
- $skipSetup \
- $failOnValidatorBootupFailure \
- \"$remoteExternalPrimordialAccountsFile\" \
- \"$maybeDisableAirdrops\" \
- \"$internalNodesStakeLamports\" \
- \"$internalNodesLamports\" \
- $nodeIndex \
- ${#clientIpList[@]} \"$benchTpsExtraArgs\" \
- \"$genesisOptions\" \
- \"$maybeNoSnapshot $maybeSkipLedgerVerify $maybeLimitLedgerSize $maybeWaitForSupermajority $maybeAccountsDbSkipShrink $maybeSkipRequireTower\" \
- \"$gpuMode\" \
- \"$maybeWarpSlot\" \
- \"$maybeFullRpc\" \
- \"$waitForNodeInit\" \
- \"$extraPrimordialStakes\" \
- \"$TMPFS_ACCOUNTS\" \
- \"$disableQuic\" \
- \"$enableUdp\" \
- \"$maybeWenRestart\" \
- "
- ) >> "$logFile" 2>&1 || {
- cat "$logFile"
- echo "^^^ +++"
- exit 1
- }
- }
- startNode() {
- declare ipAddress=$1
- declare nodeType=$2
- declare nodeIndex="$3"
- initLogDir
- declare logFile="$netLogDir/validator-$ipAddress.log"
- if [[ -z $nodeType ]]; then
- echo nodeType not specified
- exit 1
- fi
- if [[ -z $nodeIndex ]]; then
- echo nodeIndex not specified
- exit 1
- fi
- echo "--- Starting $nodeType: $ipAddress"
- echo "start log: $logFile"
- (
- set -x
- startCommon "$ipAddress"
- if [[ $nodeType = blockstreamer ]] && [[ -n $letsEncryptDomainName ]]; then
- #
- # Create/renew TLS certificate
- #
- declare localArchive=~/letsencrypt-"$letsEncryptDomainName".tgz
- if [[ -r "$localArchive" ]]; then
- timeout 30s scp "${sshOptions[@]}" "$localArchive" "$ipAddress:letsencrypt.tgz"
- fi
- ssh "${sshOptions[@]}" -n "$ipAddress" \
- "sudo -H /certbot-restore.sh $letsEncryptDomainName maintainers@solanalabs.com"
- rm -f letsencrypt.tgz
- timeout 30s scp "${sshOptions[@]}" "$ipAddress:/letsencrypt.tgz" letsencrypt.tgz
- test -s letsencrypt.tgz # Ensure non-empty before overwriting $localArchive
- cp letsencrypt.tgz "$localArchive"
- fi
- ssh "${sshOptions[@]}" -n "$ipAddress" \
- "./solana/net/remote/remote-node.sh \
- $deployMethod \
- $nodeType \
- $entrypointIp \
- $((${#validatorIpList[@]} + ${#blockstreamerIpList[@]})) \
- \"$RUST_LOG\" \
- $skipSetup \
- $failOnValidatorBootupFailure \
- \"$remoteExternalPrimordialAccountsFile\" \
- \"$maybeDisableAirdrops\" \
- \"$internalNodesStakeLamports\" \
- \"$internalNodesLamports\" \
- $nodeIndex \
- ${#clientIpList[@]} \"$benchTpsExtraArgs\" \
- \"$genesisOptions\" \
- \"$maybeNoSnapshot $maybeSkipLedgerVerify $maybeLimitLedgerSize $maybeWaitForSupermajority $maybeAccountsDbSkipShrink $maybeSkipRequireTower\" \
- \"$gpuMode\" \
- \"$maybeWarpSlot\" \
- \"$maybeFullRpc\" \
- \"$waitForNodeInit\" \
- \"$extraPrimordialStakes\" \
- \"$TMPFS_ACCOUNTS\" \
- \"$disableQuic\" \
- \"$enableUdp\" \
- \"$maybeWenRestart\" \
- "
- ) >> "$logFile" 2>&1 &
- declare pid=$!
- ln -sf "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
- pids+=("$pid")
- }
- startClient() {
- declare ipAddress=$1
- declare clientToRun="$2"
- declare clientIndex="$3"
- initLogDir
- declare logFile="$netLogDir/client-$clientToRun-$ipAddress.log"
- echo "--- Starting client: $ipAddress - $clientToRun"
- echo "start log: $logFile"
- (
- set -x
- startCommon "$ipAddress"
- ssh "${sshOptions[@]}" -f "$ipAddress" \
- "./solana/net/remote/remote-client.sh $deployMethod $entrypointIp \
- $clientToRun \"$RUST_LOG\" \"$benchTpsExtraArgs\" $clientIndex $clientType \
- $maybeUseUnstakedConnection"
- ) >> "$logFile" 2>&1 || {
- cat "$logFile"
- echo "^^^ +++"
- exit 1
- }
- }
- startClients() {
- for ((i=0; i < "$numClients" && i < "$numClientsRequested"; i++)) do
- if [[ $i -lt "$numBenchTpsClients" ]]; then
- startClient "${clientIpList[$i]}" "solana-bench-tps" "$i"
- else
- startClient "${clientIpList[$i]}" "idle"
- fi
- done
- }
- sanity() {
- declare skipBlockstreamerSanity=$1
- $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
- declare ok=true
- declare bootstrapLeader=${validatorIpList[0]}
- declare blockstreamer=${blockstreamerIpList[0]}
- annotateBlockexplorerUrl
- echo "--- Sanity: $bootstrapLeader"
- (
- set -x
- # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
- ssh "${sshOptions[@]}" "$bootstrapLeader" \
- "./solana/net/remote/remote-sanity.sh $bootstrapLeader $sanityExtraArgs \"$RUST_LOG\""
- ) || ok=false
- $ok || exit 1
- if [[ -z $skipBlockstreamerSanity && -n $blockstreamer ]]; then
- # If there's a blockstreamer node run a reduced sanity check on it as well
- echo "--- Sanity: $blockstreamer"
- (
- set -x
- # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
- ssh "${sshOptions[@]}" "$blockstreamer" \
- "./solana/net/remote/remote-sanity.sh $blockstreamer $sanityExtraArgs \"$RUST_LOG\""
- ) || ok=false
- $ok || exit 1
- fi
- $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
- }
- deployUpdate() {
- if [[ -z $updatePlatforms ]]; then
- echo "No update platforms"
- return
- fi
- if [[ -z $releaseChannel ]]; then
- echo "Release channel not specified (use -t option)"
- exit 1
- fi
- declare ok=true
- declare bootstrapLeader=${validatorIpList[0]}
- for updatePlatform in $updatePlatforms; do
- echo "--- Deploying agave-install update: $updatePlatform"
- (
- set -x
- scripts/agave-install-update-manifest-keypair.sh "$updatePlatform"
- timeout 30s scp "${sshOptions[@]}" \
- update_manifest_keypair.json "$bootstrapLeader:solana/update_manifest_keypair.json"
- # shellcheck disable=SC2029 # remote-deploy-update.sh args are expanded on client side intentionally
- ssh "${sshOptions[@]}" "$bootstrapLeader" \
- "./solana/net/remote/remote-deploy-update.sh $releaseChannel $updatePlatform"
- ) || ok=false
- $ok || exit 1
- done
- }
- getNodeType() {
- echo "getNodeType: $nodeAddress"
- [[ -n $nodeAddress ]] || {
- echo "Error: nodeAddress not set"
- exit 1
- }
- nodeIndex=0 # <-- global
- nodeType=validator # <-- global
- for ipAddress in "${validatorIpList[@]}" b "${blockstreamerIpList[@]}"; do
- if [[ $ipAddress = b ]]; then
- nodeType=blockstreamer
- continue
- fi
- if [[ $ipAddress = "$nodeAddress" ]]; then
- echo "getNodeType: $nodeType ($nodeIndex)"
- return
- fi
- ((nodeIndex = nodeIndex + 1))
- done
- echo "Error: Unknown node: $nodeAddress"
- exit 1
- }
- prepareDeploy() {
- case $deployMethod in
- tar)
- if [[ -n $releaseChannel ]]; then
- echo "Downloading release from channel: $releaseChannel"
- rm -f "$SOLANA_ROOT"/solana-release.tar.bz2
- declare updateDownloadUrl=https://release.anza.xyz/"$releaseChannel"/solana-release-x86_64-unknown-linux-gnu.tar.bz2
- (
- set -x
- curl -L -I "$updateDownloadUrl"
- curl -L --retry 5 --retry-delay 2 --retry-connrefused \
- -o "$SOLANA_ROOT"/solana-release.tar.bz2 "$updateDownloadUrl"
- )
- tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2
- fi
- (
- set -x
- rm -rf "$SOLANA_ROOT"/solana-release
- cd "$SOLANA_ROOT"; tar jfxv "$tarballFilename"
- cat "$SOLANA_ROOT"/solana-release/version.yml
- )
- ;;
- local)
- if $doBuild; then
- build
- else
- echo "Build skipped due to --no-build"
- fi
- ;;
- skip)
- ;;
- *)
- usage "Internal error: invalid deployMethod: $deployMethod"
- ;;
- esac
- if [[ -n $deployIfNewer ]]; then
- if [[ $deployMethod != tar ]]; then
- echo "Error: --deploy-if-newer only supported for tar deployments"
- exit 1
- fi
- echo "Fetching current software version"
- (
- set -x
- rsync -vPrc -e "ssh ${sshOptions[*]}" "${validatorIpList[0]}":~/version.yml current-version.yml
- )
- cat current-version.yml
- if ! diff -q current-version.yml "$SOLANA_ROOT"/solana-release/version.yml; then
- echo "Cluster software version is old. Update required"
- else
- echo "Cluster software version is current. No update required"
- exit 0
- fi
- fi
- }
- deploy() {
- initLogDir
- echo "Deployment started at $(date)"
- $metricsWriteDatapoint "testnet-deploy net-start-begin=1"
- declare bootstrapLeader=true
- for nodeAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
- nodeType=
- nodeIndex=
- getNodeType
- if $bootstrapLeader; then
- SECONDS=0
- declare bootstrapNodeDeployTime=
- startBootstrapLeader "$nodeAddress" "$nodeIndex" "$netLogDir/bootstrap-validator-$ipAddress.log"
- bootstrapNodeDeployTime=$SECONDS
- $metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1"
- bootstrapLeader=false
- SECONDS=0
- pids=()
- else
- startNode "$ipAddress" "$nodeType" "$nodeIndex"
- # Stagger additional node start time. If too many nodes start simultaneously
- # the bootstrap node gets more rsync requests from the additional nodes than
- # it can handle.
- sleep 2
- fi
- done
- for pid in "${pids[@]}"; do
- declare ok=true
- wait "$pid" || ok=false
- if ! $ok; then
- echo "+++ validator failed to start"
- cat "$netLogDir/validator-$pid.log"
- if $failOnValidatorBootupFailure; then
- exit 1
- else
- echo "Failure is non-fatal"
- fi
- fi
- done
- if ! $waitForNodeInit; then
- # Handle async init
- declare startTime=$SECONDS
- for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
- declare timeWaited=$((SECONDS - startTime))
- if [[ $timeWaited -gt 600 ]]; then
- break
- fi
- ssh "${sshOptions[@]}" -n "$ipAddress" \
- "./solana/net/remote/remote-node-wait-init.sh $((600 - timeWaited))"
- done
- fi
- $metricsWriteDatapoint "testnet-deploy net-validators-started=1"
- additionalNodeDeployTime=$SECONDS
- annotateBlockexplorerUrl
- sanity skipBlockstreamerSanity # skip sanity on blockstreamer node, it may not
- # have caught up to the bootstrap validator yet
- echo "--- Sleeping $clientDelayStart seconds after validators are started before starting clients"
- sleep "$clientDelayStart"
- SECONDS=0
- startClients
- clientDeployTime=$SECONDS
- $metricsWriteDatapoint "testnet-deploy net-start-complete=1"
- declare networkVersion=unknown
- case $deployMethod in
- tar)
- networkVersion="$(
- (
- set -o pipefail
- grep "^commit: " "$SOLANA_ROOT"/solana-release/version.yml | head -n1 | cut -d\ -f2
- ) || echo "tar-unknown"
- )"
- ;;
- local)
- networkVersion="$(git rev-parse HEAD || echo local-unknown)"
- ;;
- skip)
- ;;
- *)
- usage "Internal error: invalid deployMethod: $deployMethod"
- ;;
- esac
- $metricsWriteDatapoint "testnet-deploy version=\"${networkVersion:0:9}\""
- echo
- echo "--- Deployment Successful"
- echo "Bootstrap validator deployment took $bootstrapNodeDeployTime seconds"
- echo "Additional validator deployment (${#validatorIpList[@]} validators, ${#blockstreamerIpList[@]} blockstreamer nodes) took $additionalNodeDeployTime seconds"
- echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
- echo "Network start logs in $netLogDir"
- }
- stopNode() {
- local ipAddress=$1
- local block=$2
- initLogDir
- declare logFile="$netLogDir/stop-validator-$ipAddress.log"
- echo "--- Stopping node: $ipAddress"
- echo "stop log: $logFile"
- syncScripts "$ipAddress"
- (
- # Since cleanup.sh does a pkill, we cannot pass the command directly,
- # otherwise the process which is doing the killing will be killed because
- # the script itself will match the pkill pattern
- set -x
- # shellcheck disable=SC2029 # It's desired that PS4 be expanded on the client side
- ssh "${sshOptions[@]}" "$ipAddress" "PS4=\"$PS4\" ./solana/net/remote/cleanup.sh"
- ) >> "$logFile" 2>&1 &
- declare pid=$!
- ln -sf "stop-validator-$ipAddress.log" "$netLogDir/stop-validator-$pid.log"
- if $block; then
- wait $pid || true
- else
- pids+=("$pid")
- fi
- }
- stop() {
- SECONDS=0
- $metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
- declare loopCount=0
- pids=()
- for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}" "${clientIpList[@]}"; do
- stopNode "$ipAddress" false
- # Stagger additional node stop time to avoid too many concurrent ssh
- # sessions
- ((loopCount++ % 4 == 0)) && sleep 2
- done
- echo --- Waiting for nodes to finish stopping
- for pid in "${pids[@]}"; do
- echo -n "$pid "
- wait "$pid" || true
- done
- echo
- $metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
- echo "Stopping nodes took $SECONDS seconds"
- }
- checkPremptibleInstances() {
- # The validatorIpList nodes may be preemptible instances that can disappear at
- # any time. Try to detect when a validator has been preempted to help the user
- # out.
- #
- # Of course this isn't airtight as an instance could always disappear
- # immediately after its successfully pinged.
- for ipAddress in "${validatorIpList[@]}"; do
- (
- timeout 5s ping -c 1 "$ipAddress" | tr - _ &>/dev/null
- ) || {
- cat <<EOF
- Warning: $ipAddress may have been preempted.
- Run |./gce.sh config| to restart it
- EOF
- exit 1
- }
- done
- }
- releaseChannel=
- deployMethod=local
- deployIfNewer=
- sanityExtraArgs=
- skipSetup=false
- updatePlatforms=
- nodeAddress=
- numIdleClients=0
- numBenchTpsClients=0
- benchTpsExtraArgs=
- failOnValidatorBootupFailure=true
- genesisOptions=
- numValidatorsRequested=
- externalPrimordialAccountsFile=
- remoteExternalPrimordialAccountsFile=
- internalNodesStakeLamports=
- internalNodesLamports=
- maybeNoSnapshot=""
- maybeLimitLedgerSize=""
- maybeSkipLedgerVerify=""
- maybeDisableAirdrops=""
- maybeWaitForSupermajority=""
- maybeAccountsDbSkipShrink=""
- maybeSkipRequireTower=""
- debugBuild=false
- profileBuild=false
- doBuild=true
- gpuMode=auto
- netemPartition=""
- netemConfig=""
- netemConfigFile=""
- netemCommand="add"
- clientDelayStart=0
- netLogDir=
- maybeWarpSlot=
- maybeFullRpc=false
- waitForNodeInit=true
- extraPrimordialStakes=0
- disableQuic=false
- enableUdp=false
- clientType=tpu-client
- maybeUseUnstakedConnection=""
- maybeWenRestart=""
- command=$1
- [[ -n $command ]] || usage
- shift
- shortArgs=()
- while [[ -n $1 ]]; do
- if [[ ${1:0:2} = -- ]]; then
- if [[ $1 = --hashes-per-tick ]]; then
- genesisOptions="$genesisOptions $1 $2"
- shift 2
- elif [[ $1 = --slots-per-epoch ]]; then
- genesisOptions="$genesisOptions $1 $2"
- shift 2
- elif [[ $1 = --target-lamports-per-signature ]]; then
- genesisOptions="$genesisOptions $1 $2"
- shift 2
- elif [[ $1 = --faucet-lamports ]]; then
- genesisOptions="$genesisOptions $1 $2"
- shift 2
- elif [[ $1 = --cluster-type ]]; then
- case "$2" in
- development|devnet|testnet|mainnet-beta)
- ;;
- *)
- echo "Unexpected cluster type: \"$2\""
- exit 1
- ;;
- esac
- genesisOptions="$genesisOptions $1 $2"
- shift 2
- elif [[ $1 = --slots-per-epoch ]]; then
- genesisOptions="$genesisOptions $1 $2"
- shift 2
- elif [[ $1 = --no-snapshot-fetch ]]; then
- maybeNoSnapshot="$1"
- shift 1
- elif [[ $1 = --deploy-if-newer ]]; then
- deployIfNewer=1
- shift 1
- elif [[ $1 = --no-deploy ]]; then
- deployMethod=skip
- shift 1
- elif [[ $1 = --no-build ]]; then
- doBuild=false
- shift 1
- elif [[ $1 = --limit-ledger-size ]]; then
- maybeLimitLedgerSize="$1 $2"
- shift 2
- elif [[ $1 = --skip-poh-verify ]]; then
- maybeSkipLedgerVerify="$1"
- shift 1
- elif [[ $1 = --skip-setup ]]; then
- skipSetup=true
- shift 1
- elif [[ $1 = --platform ]]; then
- updatePlatforms="$updatePlatforms $2"
- shift 2
- elif [[ $1 = --internal-nodes-stake-lamports ]]; then
- internalNodesStakeLamports="$2"
- shift 2
- elif [[ $1 = --internal-nodes-lamports ]]; then
- internalNodesLamports="$2"
- shift 2
- elif [[ $1 = --external-accounts-file ]]; then
- externalPrimordialAccountsFile="$2"
- remoteExternalPrimordialAccountsFile=/tmp/external-primordial-accounts.yml
- shift 2
- elif [[ $1 = --no-airdrop ]]; then
- maybeDisableAirdrops="$1"
- shift 1
- elif [[ $1 = --debug ]]; then
- debugBuild=true
- shift 1
- elif [[ $1 = --profile ]]; then
- profileBuild=true
- shift 1
- elif [[ $1 = --partition ]]; then
- netemPartition=$2
- shift 2
- elif [[ $1 = --config ]]; then
- netemConfig=$2
- shift 2
- elif [[ $1 == --config-file ]]; then
- netemConfigFile=$2
- shift 2
- elif [[ $1 == --netem-cmd ]]; then
- netemCommand=$2
- shift 2
- elif [[ $1 = --gpu-mode ]]; then
- gpuMode=$2
- case "$gpuMode" in
- on|off|auto|cuda)
- ;;
- *)
- echo "Unexpected GPU mode: \"$gpuMode\""
- exit 1
- ;;
- esac
- shift 2
- elif [[ $1 == --client-delay-start ]]; then
- clientDelayStart=$2
- shift 2
- elif [[ $1 == --wait-for-supermajority ]]; then
- maybeWaitForSupermajority="$1 $2"
- shift 2
- elif [[ $1 == --warp-slot ]]; then
- maybeWarpSlot="$1 $2"
- shift 2
- elif [[ $1 == --full-rpc ]]; then
- maybeFullRpc=true
- shift 1
- elif [[ $1 == --tpu-disable-quic ]]; then
- disableQuic=true
- shift 1
- elif [[ $1 == --tpu-enable-udp ]]; then
- enableUdp=true
- shift 1
- elif [[ $1 == --async-node-init ]]; then
- waitForNodeInit=false
- shift 1
- elif [[ $1 == --extra-primordial-stakes ]]; then
- extraPrimordialStakes=$2
- shift 2
- elif [[ $1 = --allow-private-addr ]]; then
- echo "--allow-private-addr is a default value"
- shift 1
- elif [[ $1 = --accounts-db-skip-shrink ]]; then
- maybeAccountsDbSkipShrink="$1"
- shift 1
- elif [[ $1 = --skip-require-tower ]]; then
- maybeSkipRequireTower="$1"
- shift 1
- elif [[ $1 = --client-type ]]; then
- clientType=$2
- case "$clientType" in
- tpu-client|rpc-client)
- ;;
- *)
- echo "Unexpected client type: \"$clientType\""
- exit 1
- ;;
- esac
- shift 2
- elif [[ $1 = --use-unstaked-connection ]]; then
- maybeUseUnstakedConnection="$1"
- shift 1
- elif [[ $1 = --wen-restart ]]; then
- # wen_restart needs tower storage to be there, so set skipSetup to true
- # to avoid erasing the tower storage on disk.
- skipSetup=true
- maybeWenRestart="$2"
- shift 2
- else
- usage "Unknown long option: $1"
- fi
- else
- shortArgs+=("$1")
- shift
- fi
- done
- while getopts "h?T:t:o:f:rc:Fn:i:d" opt "${shortArgs[@]}"; do
- case $opt in
- h | \?)
- usage
- ;;
- T)
- tarballFilename=$OPTARG
- [[ -r $tarballFilename ]] || usage "File not readable: $tarballFilename"
- deployMethod=tar
- ;;
- t)
- case $OPTARG in
- edge|beta|stable|v*)
- releaseChannel=$OPTARG
- deployMethod=tar
- ;;
- *)
- usage "Invalid release channel: $OPTARG"
- ;;
- esac
- ;;
- n)
- numValidatorsRequested=$OPTARG
- ;;
- r)
- skipSetup=true
- ;;
- o)
- case $OPTARG in
- rejectExtraNodes|noInstallCheck)
- sanityExtraArgs="$sanityExtraArgs -o $OPTARG"
- ;;
- *)
- usage "Unknown option: $OPTARG"
- ;;
- esac
- ;;
- c)
- getClientTypeAndNum() {
- if ! [[ $OPTARG == *'='* ]]; then
- echo "Error: Expecting tuple \"clientType=numClientType=extraArgs\" but got \"$OPTARG\""
- exit 1
- fi
- local keyValue
- IFS='=' read -ra keyValue <<< "$OPTARG"
- local clientType=${keyValue[0]}
- local numClients=${keyValue[1]}
- local extraArgs=${keyValue[2]}
- re='^[0-9]+$'
- if ! [[ $numClients =~ $re ]] ; then
- echo "error: numClientType must be a number but got \"$numClients\""
- exit 1
- fi
- case $clientType in
- idle)
- numIdleClients=$numClients
- # $extraArgs ignored for 'idle'
- ;;
- bench-tps)
- numBenchTpsClients=$numClients
- benchTpsExtraArgs=$extraArgs
- ;;
- *)
- echo "Unknown client type: $clientType"
- exit 1
- ;;
- esac
- }
- getClientTypeAndNum
- ;;
- F)
- failOnValidatorBootupFailure=false
- ;;
- i)
- nodeAddress=$OPTARG
- ;;
- d)
- debugBuild=true
- ;;
- *)
- usage "Error: unhandled option: $opt"
- ;;
- esac
- done
- loadConfigFile
- if [[ -n $numValidatorsRequested ]]; then
- truncatedNodeList=( "${validatorIpList[@]:0:$numValidatorsRequested}" )
- unset validatorIpList
- validatorIpList=( "${truncatedNodeList[@]}" )
- fi
- numClients=${#clientIpList[@]}
- numClientsRequested=$((numBenchTpsClients + numIdleClients))
- if [[ "$numClientsRequested" -eq 0 ]]; then
- numBenchTpsClients=$numClients
- numClientsRequested=$numClients
- else
- if [[ "$numClientsRequested" -gt "$numClients" ]]; then
- echo "Error: More clients requested ($numClientsRequested) then available ($numClients)"
- exit 1
- fi
- fi
- if [[ -n "$maybeWaitForSupermajority" && -n "$maybeWarpSlot" ]]; then
- read -r _ waitSlot <<<"$maybeWaitForSupermajority"
- read -r _ warpSlot <<<"$maybeWarpSlot"
- if [[ $waitSlot -ne $warpSlot ]]; then
- echo "Error: When specifying both --wait-for-supermajority and --warp-slot,"
- echo "they must use the same slot. ($waitSlot != $warpSlot)"
- exit 1
- fi
- fi
- echo "net.sh: Primordial stakes: $extraPrimordialStakes"
- if [[ $extraPrimordialStakes -gt 0 ]]; then
- # Extra primoridial stakes require that all of the validators start at
- # the same time. Force async init and wait for supermajority here.
- waitForNodeInit=false
- if [[ -z "$maybeWaitForSupermajority" ]]; then
- waitSlot=
- if [[ -n "$maybeWarpSlot" ]]; then
- read -r _ waitSlot <<<"$maybeWarpSlot"
- else
- waitSlot=1
- fi
- maybeWaitForSupermajority="--wait-for-supermajority $waitSlot"
- fi
- fi
- checkPremptibleInstances
- case $command in
- restart)
- prepareDeploy
- stop
- deploy
- ;;
- start)
- prepareDeploy
- deploy
- ;;
- prepare)
- prepareDeploy
- ;;
- sanity)
- sanity
- ;;
- stop)
- stop
- ;;
- update)
- deployUpdate
- ;;
- upgrade)
- bootstrapValidatorIp="${validatorIpList[0]}"
- prepareDeploy
- deployBootstrapValidator "$bootstrapValidatorIp"
- # (start|stop)Node need refactored to support restarting the bootstrap validator
- ;;
- stopnode)
- if [[ -z $nodeAddress ]]; then
- usage "node address (-i) not specified"
- exit 1
- fi
- stopNode "$nodeAddress" true
- ;;
- startnode)
- if [[ -z $nodeAddress ]]; then
- usage "node address (-i) not specified"
- exit 1
- fi
- nodeType=
- nodeIndex=
- getNodeType
- startNode "$nodeAddress" "$nodeType" "$nodeIndex"
- ;;
- startclients)
- startClients
- ;;
- logs)
- initLogDir
- fetchRemoteLog() {
- declare ipAddress=$1
- declare log=$2
- echo "--- fetching $log from $ipAddress"
- (
- set -x
- timeout 30s scp "${sshOptions[@]}" \
- "$ipAddress":solana/"$log".log "$netLogDir"/remote-"$log"-"$ipAddress".log
- ) || echo "failed to fetch log"
- }
- fetchRemoteLog "${validatorIpList[0]}" faucet
- for ipAddress in "${validatorIpList[@]}"; do
- fetchRemoteLog "$ipAddress" validator
- done
- for ipAddress in "${clientIpList[@]}"; do
- fetchRemoteLog "$ipAddress" client
- done
- for ipAddress in "${blockstreamerIpList[@]}"; do
- fetchRemoteLog "$ipAddress" validator
- done
- ;;
- netem)
- if [[ -n $netemConfigFile ]]; then
- remoteNetemConfigFile="$(basename "$netemConfigFile")"
- if [[ $netemCommand = "add" ]]; then
- for ipAddress in "${validatorIpList[@]}"; do
- remoteHome=$(remoteHomeDir "$ipAddress")
- remoteSolanaHome="${remoteHome}/solana"
- "$here"/scp.sh "$netemConfigFile" solana@"$ipAddress":"$remoteSolanaHome"
- done
- fi
- for i in "${!validatorIpList[@]}"; do
- "$here"/ssh.sh solana@"${validatorIpList[$i]}" 'solana/scripts/net-shaper.sh' \
- "$netemCommand" ~solana/solana/"$remoteNetemConfigFile" "${#validatorIpList[@]}" "$i"
- done
- else
- num_nodes=$((${#validatorIpList[@]}*netemPartition/100))
- if [[ $((${#validatorIpList[@]}*netemPartition%100)) -gt 0 ]]; then
- num_nodes=$((num_nodes+1))
- fi
- if [[ "$num_nodes" -gt "${#validatorIpList[@]}" ]]; then
- num_nodes=${#validatorIpList[@]}
- fi
- # Stop netem on all nodes
- for ipAddress in "${validatorIpList[@]}"; do
- "$here"/ssh.sh solana@"$ipAddress" 'solana/scripts/netem.sh delete < solana/netem.cfg || true'
- done
- # Start netem on required nodes
- for ((i=0; i<num_nodes; i++ )); do :
- "$here"/ssh.sh solana@"${validatorIpList[$i]}" "echo $netemConfig > solana/netem.cfg; solana/scripts/netem.sh add \"$netemConfig\""
- done
- fi
- ;;
- *)
- echo "Internal error: Unknown command: $command"
- usage
- exit 1
- esac
|