#!/bin/bash # Check various system health statistics and push them to Uptime Kuma # Triggers a "down" alert if a stat is above the threshold, # and sends info about what's wrong # Also supports checking ZFS pool health API_URL="https://status.netsyms.net/api/push/" PUSH_KEY="" # The random code in the URL generated by Uptime Kuma GRAPH="CPU" # Which stat is graphed in Uptime Kuma. Can set to "RAM" for memory usage or to "" to disable. ENABLE_ZFS_MONITOR=1 # Set to 0 if you aren't using ZFS DISK_FULL_ALERT_PERCENT_THRESHOLD=85 # Alerts if any filesystem is fuller than this CPU_USAGE_PERCENT_THRESHOLD=60 # Alerts if CPU usage averages greater than this across 5 minutes MEM_USAGE_PERCENT_THRESHOLD=80 # Alerts if RAM usage rises over this percentage, ZFS ARC counts as used # ZFS health ZFS_STATUS="OK" if [[ $ENABLE_ZFS_MONITOR == 1 ]]; then echo -n "Checking ZFS health: " ZFS_STATUS=$(zpool status -x | grep -q "all pools are healthy" && echo "OK" || zpool status -x) echo "$ZFS_STATUS" fi # Disk usage echo -n "Checking disk usage: " DISKS_STATUS="OK" USAGES=() while read -r output; do partition=$(echo "$output" | awk '{ print $2 }') percent=$(echo "$output" | awk '{ print $1 }' | cut -d'%' -f1) if [ $percent -ge $DISK_FULL_ALERT_PERCENT_THRESHOLD ]; then USAGES+=("$partition: $percent% > $DISK_FULL_ALERT_PERCENT_THRESHOLD%") DISKS_STATUS="NOTOK" fi done <<< $(df | grep -vE "^Filesystem|tmpfs|cdrom" | awk '{ print $5 " " $1 }') USAGETEXT=$(IFS=","; echo "${USAGES[*]}") if [[ "$DISKS_STATUS" != "OK" ]]; then DISKS_STATUS="$USAGETEXT" fi echo "$DISKS_STATUS" # CPU usage percentage # Calculated from system load, average over past 5 minutes echo -n "Checking CPU load: " SYSTEM_LOAD=$(uptime | awk '{print $11}' | cut -d "," -f 1) CPU_COUNT=$(nproc) CPU_PERCENT=$(awk -v l=$SYSTEM_LOAD -v c=$CPU_COUNT 'BEGIN {printf "%.2f\n", (l/c)*100}') CPU_STATUS="OK" if [[ $CPU_PERCENT > $CPU_USAGE_PERCENT_THRESHOLD ]]; then CPU_STATUS="$CPU_PERCENT% > $CPU_USAGE_PERCENT_THRESHOLD%" fi echo "$CPU_STATUS" # Memory usage percentage echo -n "Checking memory usage: " MEM_STATUS="OK" MEM_PERCENT=$(free -m | awk 'NR==2{ print $3*100/$2 }') if [[ $MEM_PERCENT > $MEM_USAGE_PERCENT_THRESHOLD ]]; then MEM_STATUS="$MEM_PERCENT% > $MEM_USAGE_PERCENT_THRESHOLD%" fi echo "$MEM_STATUS" # # Put it all together # IS_OK=1 ERROR_MESSAGES=() if [[ $ZFS_STATUS != "OK" ]]; then IS_OK=0 ERROR_MESSAGES+=("ZFS alert: $ZFS_STATUS") fi if [[ $DISKS_STATUS != "OK" ]]; then IS_OK=0 ERROR_MESSAGES+=("Disk usage alert: $DISKS_STATUS") fi if [[ $CPU_STATUS != "OK" ]]; then IS_OK=0 ERROR_MESSAGES+=("CPU usage alert: $CPU_STATUS") fi if [[ $MEM_STATUS != "OK" ]]; then IS_OK=0 ERROR_MESSAGES+=("Memory usage alert: $MEM_STATUS") fi echo -n "Sending status: " PING_VALUE="" if [[ $GRAPH == "CPU" ]]; then PING_VALUE="$CPU_PERCENT" elif [[ $GRAPH == "RAM" ]]; then PING_VALUE="$MEM_PERCENT" fi if [[ $IS_OK == "1" ]]; then echo "OK" curl -s -o /dev/null -G "$API_URL$PUSH_KEY?status=up&msg=OK&ping=$PING_VALUE" else ERROR_STRING=$(IFS=";"; echo "${ERROR_MESSAGES[*]}") echo $ERROR_STRING curl -s -o /dev/null -G --data-urlencode "status=down" --data-urlencode "ping=$PING_VALUE" --data-urlencode "msg=$ERROR_STRING" "$API_URL$PUSH_KEY" fi