| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353 |
- #!/usr/bin/env bash
- # | source | this file
- # shellcheck disable=SC1090
- # shellcheck disable=SC1091
- # shellcheck disable=SC2034
- DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
- REPO_ROOT=${DIR}/..
- source "${REPO_ROOT}"/ci/upload-ci-artifact.sh
- function execution_step {
- # shellcheck disable=SC2124
- STEP="$@"
- echo --- "${STEP[@]}"
- }
- function collect_logs {
- execution_step "Collect logs from remote nodes"
- rm -rf "${REPO_ROOT}"/net/log
- "${REPO_ROOT}"/net/net.sh logs
- for logfile in "${REPO_ROOT}"/net/log/*; do
- (
- upload-ci-artifact "$logfile"
- )
- done
- }
- function analyze_packet_loss {
- (
- set -x
- # shellcheck disable=SC1091
- source "${REPO_ROOT}"/net/config/config
- mkdir -p iftop-logs
- execution_step "Map private -> public IP addresses in iftop logs"
- # shellcheck disable=SC2154
- for i in "${!validatorIpList[@]}"; do
- # shellcheck disable=SC2154
- # shellcheck disable=SC2086
- # shellcheck disable=SC2027
- echo "{\"private\": \""${validatorIpListPrivate[$i]}""\", \"public\": \""${validatorIpList[$i]}""\"},"
- done > ip_address_map.txt
- for ip in "${validatorIpList[@]}"; do
- "${REPO_ROOT}"/net/scp.sh ip_address_map.txt solana@"$ip":~/solana/
- done
- execution_step "Remotely post-process iftop logs"
- # shellcheck disable=SC2154
- for ip in "${validatorIpList[@]}"; do
- iftop_log=iftop-logs/$ip-iftop.log
- # shellcheck disable=SC2016
- "${REPO_ROOT}"/net/ssh.sh solana@"$ip" 'PATH=$PATH:~/.cargo/bin/ ~/solana/scripts/iftop-postprocess.sh ~/solana/iftop.log temp.log ~solana/solana/ip_address_map.txt' > "$iftop_log"
- upload-ci-artifact "$iftop_log"
- done
- execution_step "Analyzing Packet Loss"
- "${REPO_ROOT}"/solana-release/bin/solana-log-analyzer analyze -f ./iftop-logs/ | sort -k 2 -g
- )
- }
- function wait_for_max_stake {
- max_stake="$1"
- if [[ $max_stake -eq 100 ]]; then
- return
- fi
- source "${REPO_ROOT}"/net/common.sh
- loadConfigFile
- # shellcheck disable=SC2154
- # shellcheck disable=SC2029
- ssh "${sshOptions[@]}" "${validatorIpList[0]}" "RUST_LOG=info \$HOME/.cargo/bin/solana wait-for-max-stake $max_stake --url http://127.0.0.1:8899"
- }
- function wait_for_equal_stake {
- source "${REPO_ROOT}"/net/common.sh
- loadConfigFile
- max_stake=$((100 / ${#validatorIpList[@]} + 1))
- execution_step "Waiting for max stake to fall below ${max_stake}%"
- wait_for_max_stake $max_stake
- }
- function get_slot {
- source "${REPO_ROOT}"/net/common.sh
- loadConfigFile
- ssh "${sshOptions[@]}" "${validatorIpList[0]}" '$HOME/.cargo/bin/solana --url http://127.0.0.1:8899 slot'
- }
- function get_bootstrap_validator_ip_address {
- source "${REPO_ROOT}"/net/common.sh
- loadConfigFile
- echo "${validatorIpList[0]}"
- }
- function get_active_stake {
- source "${REPO_ROOT}"/net/common.sh
- loadConfigFile
- ssh "${sshOptions[@]}" "${validatorIpList[0]}" \
- '$HOME/.cargo/bin/solana --url http://127.0.0.1:8899 validators --output=json | grep -o "totalActiveStake\": [0-9]*" | cut -d: -f2'
- }
- function get_current_stake {
- source "${REPO_ROOT}"/net/common.sh
- loadConfigFile
- ssh "${sshOptions[@]}" "${validatorIpList[0]}" \
- '$HOME/.cargo/bin/solana --url http://127.0.0.1:8899 validators --output=json | grep -o "totalCurrentStake\": [0-9]*" | cut -d: -f2'
- }
- function get_validator_confirmation_time {
- SINCE=$1
- declare q_mean_confirmation='
- SELECT ROUND(MEAN("duration_ms")) as "mean_confirmation_ms"
- FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
- WHERE time > now() - '"$SINCE"'s'
- mean_confirmation_ms=$( \
- curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
- --data-urlencode "db=${TESTNET_TAG}" \
- --data-urlencode "q=$q_mean_confirmation" |
- python3 "${REPO_ROOT}"/system-test/testnet-automation-json-parser.py --empty_error |
- cut -d' ' -f2)
- }
- function collect_performance_statistics {
- execution_step "Collect performance statistics about run"
- # total_transactions will be 0 when the node is leader, so exclude those
- declare q_mean_tps='
- SELECT ROUND(MEAN("median_sum")) as "mean_tps" FROM (
- SELECT MEDIAN(sum_total_transactions) AS "median_sum" FROM (
- SELECT SUM("total_transactions") AS "sum_total_transactions"
- FROM "'$TESTNET_TAG'"."autogen"."replay-slot-stats"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s AND total_transactions > 0
- GROUP BY time(1s), host_id)
- GROUP BY time(1s)
- )'
- declare q_max_tps='
- SELECT MAX("median_sum") as "max_tps" FROM (
- SELECT MEDIAN(sum_total_transactions) AS "median_sum" FROM (
- SELECT SUM("total_transactions") AS "sum_total_transactions"
- FROM "'$TESTNET_TAG'"."autogen"."replay-slot-stats"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s AND total_transactions > 0
- GROUP BY time(1s), host_id)
- GROUP BY time(1s)
- )'
- declare q_mean_confirmation='
- SELECT round(mean("duration_ms")) as "mean_confirmation_ms"
- FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s'
- declare q_max_confirmation='
- SELECT round(max("duration_ms")) as "max_confirmation_ms"
- FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s'
- declare q_99th_confirmation='
- SELECT round(percentile("duration_ms", 99)) as "99th_percentile_confirmation_ms"
- FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s'
- declare q_max_tower_distance_observed='
- SELECT MAX("tower_distance") as "max_tower_distance" FROM (
- SELECT last("slot") - last("root") as "tower_distance"
- FROM "'$TESTNET_TAG'"."autogen"."tower-observed"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s
- GROUP BY time(1s), host_id)'
- declare q_last_tower_distance_observed='
- SELECT MEAN("tower_distance") as "last_tower_distance" FROM (
- SELECT last("slot") - last("root") as "tower_distance"
- FROM "'$TESTNET_TAG'"."autogen"."tower-observed"
- GROUP BY host_id)'
- curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
- --data-urlencode "db=${TESTNET_TAG}" \
- --data-urlencode "q=$q_mean_tps;$q_max_tps;$q_mean_confirmation;$q_max_confirmation;$q_99th_confirmation;$q_max_tower_distance_observed;$q_last_tower_distance_observed" |
- python3 "${REPO_ROOT}"/system-test/testnet-automation-json-parser.py >>"$RESULT_FILE"
- declare q_dropped_vote_hash_count='
- SELECT sum("count") as "sum_dropped_vote_hash"
- FROM "'$TESTNET_TAG'"."autogen"."dropped-vote-hash"
- WHERE time > now() - '"$TEST_DURATION_SECONDS"'s'
- # store in variable to be returned
- dropped_vote_hash_count=$( \
- curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
- --data-urlencode "db=${TESTNET_TAG}" \
- --data-urlencode "q=$q_dropped_vote_hash_count" |
- python3 "${REPO_ROOT}"/system-test/testnet-automation-json-parser-missing.py)
- }
- function upload_results_to_slack() {
- echo --- Uploading results to Slack Performance Results App
- if [[ -z $SLACK_WEBHOOK_URL ]] ; then
- echo "SLACK_WEBHOOOK_URL undefined"
- exit 1
- fi
- [[ -n $BUILDKITE_MESSAGE ]] || BUILDKITE_MESSAGE="Message not defined"
- COMMIT=$(git rev-parse HEAD)
- COMMIT_BUTTON_TEXT="$(echo "$COMMIT" | head -c 8)"
- COMMIT_URL="https://github.com/solana-labs/solana/commit/${COMMIT}"
- if [[ -n $BUILDKITE_BUILD_URL ]] ; then
- BUILD_BUTTON_TEXT="Build Kite Job"
- else
- BUILD_BUTTON_TEXT="Build URL not defined"
- BUILDKITE_BUILD_URL="https://buildkite.com/solana-labs/"
- fi
- GRAFANA_URL="https://internal-metrics.solana.com:3000/d/monitor-${CHANNEL:-edge}/cluster-telemetry-${CHANNEL:-edge}?var-testnet=${TESTNET_TAG:-testnet-automation}&from=${TESTNET_START_UNIX_MSECS:-0}&to=${TESTNET_FINISH_UNIX_MSECS:-0}"
- [[ -n $RESULT_DETAILS ]] || RESULT_DETAILS="Undefined"
- [[ -n $TEST_CONFIGURATION ]] || TEST_CONFIGURATION="Undefined"
- payLoad="$(cat <<EOF
- {
- "blocks": [
- {
- "type": "section",
- "text": {
- "type": "mrkdwn",
- "text": "*$BUILDKITE_MESSAGE*"
- }
- },
- {
- "type": "actions",
- "elements": [
- {
- "type": "button",
- "text": {
- "type": "plain_text",
- "text": "$COMMIT_BUTTON_TEXT",
- "emoji": true
- },
- "url": "$COMMIT_URL"
- },
- {
- "type": "button",
- "text": {
- "type": "plain_text",
- "text": "$BUILD_BUTTON_TEXT",
- "emoji": true
- },
- "url": "$BUILDKITE_BUILD_URL"
- },
- {
- "type": "button",
- "text": {
- "type": "plain_text",
- "text": "Grafana",
- "emoji": true
- },
- "url": "$GRAFANA_URL"
- }
- ]
- },
- {
- "type": "divider"
- },
- {
- "type": "section",
- "text": {
- "type": "mrkdwn",
- "text": "Test Configuration: \n\`\`\`$TEST_CONFIGURATION\`\`\`"
- }
- },
- {
- "type": "divider"
- },
- {
- "type": "section",
- "text": {
- "type": "mrkdwn",
- "text": "Result Details: \n\`\`\`$RESULT_DETAILS\`\`\`"
- }
- }
- ]
- }
- EOF
- )"
- curl -X POST \
- -H 'Content-type: application/json' \
- --data "$payLoad" \
- "$SLACK_WEBHOOK_URL"
- }
- function upload_results_to_discord() {
- echo --- Uploading results to Discord Performance Results App
- if [[ -z $DISCORD_WEBHOOK_URL ]] ; then
- echo "DISCORD_WEBHOOK_URL undefined"
- exit 1
- fi
- [[ -n $BUILDKITE_MESSAGE ]] || BUILDKITE_MESSAGE="Message not defined"
- COMMIT=$(git rev-parse HEAD)
- COMMIT_BUTTON_TEXT="$(echo "$COMMIT" | head -c 8)"
- COMMIT_URL="https://github.com/solana-labs/solana/commit/${COMMIT}"
- if [[ -n $BUILDKITE_BUILD_URL ]] ; then
- BUILD_BUTTON_TEXT="Build Kite Job"
- else
- BUILD_BUTTON_TEXT="Build URL not defined"
- BUILDKITE_BUILD_URL="https://buildkite.com/solana-labs/"
- fi
- GRAFANA_URL="https://internal-metrics.solana.com:3000/d/monitor-${CHANNEL:-edge}/cluster-telemetry-${CHANNEL:-edge}?var-testnet=${TESTNET_TAG:-testnet-automation}&from=${TESTNET_START_UNIX_MSECS:-0}&to=${TESTNET_FINISH_UNIX_MSECS:-0}"
- [[ -n $RESULT_DETAILS ]] || RESULT_DETAILS="Undefined"
- SANITIZED_RESULT=${RESULT_DETAILS//$'\n'/"\n"}
- [[ -n $TEST_CONFIGURATION ]] || TEST_CONFIGURATION="Undefined"
- curl "$DISCORD_WEBHOOK_URL" \
- -X POST \
- -H "Content-Type: application/json" \
- -d @- <<EOF
- {
- "username": "System Performance Test",
- "content": "\
- **$BUILDKITE_MESSAGE**\n\
- [$COMMIT_BUTTON_TEXT](<$COMMIT_URL>) | [$BUILD_BUTTON_TEXT](<$BUILDKITE_BUILD_URL>) | [Grafana](<$GRAFANA_URL>)\n\
- Test Configuration:\n\
- \`\`\`$TEST_CONFIGURATION\`\`\`\n\
- Result Details:\n\
- \`\`\`$SANITIZED_RESULT\`\`\`\n\
- "
- }
- EOF
- }
- function get_net_launch_software_version_launch_args() {
- declare channel="${1?}"
- declare artifact_basename="${2?}"
- declare return_varname="${3:?}"
- if [[ -n $channel ]]; then
- eval "$return_varname=-t\ \$channel"
- else
- execution_step "Downloading tar from build artifacts (${artifact_basename})"
- buildkite-agent artifact download "${artifact_basename}*.tar.bz2" .
- eval "$return_varname=-T\ \${artifact_basename}*.tar.bz2"
- fi
- }
|