system-stats.sh 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. #!/usr/bin/env bash
  2. #
  3. # Reports cpu and ram usage statistics
  4. #
  5. set -e
  6. [[ $(uname) == Linux ]] || exit 0
  7. # need to cd like this to avoid #SC1091
  8. cd "$(dirname "$0")/.."
  9. source scripts/configure-metrics.sh
  10. while true; do
  11. # collect top twice because the first time is inaccurate
  12. top_output="$(top -bn2 -d1)"
  13. # collect the total cpu usage by subtracting idle usage from 100%
  14. cpu_usage=$(echo "${top_output}" | grep '%Cpu(s):' | sed "s/.*, *\([0-9.]*\)%* id.*/\1/" | tail -1 | awk '{print 100 - $1}')
  15. # collect the total ram usage by dividing used memory / total memory
  16. ram_total_and_usage=$(echo "${top_output}" | grep '.*B Mem'| tail -1 | sed "s/.*: *\([0-9.]*\)%* total.*, *\([0-9.]*\)%* used.*/\1 \2/")
  17. read -r total used <<< "$ram_total_and_usage"
  18. ram_usage=$(awk "BEGIN {print $used / $total * 100}")
  19. cpu_report="cpu_usage=$cpu_usage,ram_usage=$ram_usage"
  20. # if nvidia-smi exists, report gpu stats
  21. gpu_report=""
  22. if [ -x "$(command -v nvidia-smi)" ]; then
  23. mapfile -t individual_gpu_usage < <(nvidia-smi --query-gpu=utilization.gpu,memory.used,memory.total --format=csv,nounits,noheader)
  24. total_gpu_usage=0
  25. total_gpu_mem_usage=0
  26. num_gpus=${#individual_gpu_usage[@]}
  27. for entry in "${individual_gpu_usage[@]}"
  28. do
  29. read -r compute mem_used mem_total <<< "${entry//,/}"
  30. total_gpu_usage=$(awk "BEGIN {print $total_gpu_usage + $compute }")
  31. total_gpu_mem_usage=$(awk "BEGIN {print $total_gpu_mem_usage + $mem_used / $mem_total * 100}")
  32. done
  33. avg_gpu_usage=$(awk "BEGIN {print $total_gpu_usage / $num_gpus}")
  34. avg_gpu_mem_usage=$(awk "BEGIN {print $total_gpu_mem_usage / $num_gpus}")
  35. gpu_report=",avg_gpu_usage=$avg_gpu_usage,avg_gpu_mem_usage=$avg_gpu_mem_usage"
  36. fi
  37. report="${cpu_report}${gpu_report}"
  38. ./scripts/metrics-write-datapoint.sh "system-stats,hostname=$HOSTNAME $report"
  39. sleep 1
  40. done