2025-08-31 16:27:13 -06:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
# Check various system health statistics and push them to Uptime Kuma
|
|
|
|
# Triggers a "down" alert if a stat is above the threshold,
|
|
|
|
# and sends info about what's wrong
|
|
|
|
# Also supports checking ZFS pool health
|
|
|
|
|
2025-08-31 14:58:03 -06:00
|
|
|
API_URL="https://status.netsyms.net/api/push/"
|
2025-08-31 16:27:13 -06:00
|
|
|
PUSH_KEY="" # The random code in the URL generated by Uptime Kuma
|
|
|
|
ENABLE_ZFS_MONITOR=1 # Set to 0 if you aren't using ZFS
|
|
|
|
DISK_FULL_ALERT_PERCENT_THRESHOLD=85 # Alerts if any filesystem is fuller than this
|
|
|
|
CPU_USAGE_PERCENT_THRESHOLD=60 # Alerts if CPU usage averages greater than this across 5 minutes
|
|
|
|
MEM_USAGE_PERCENT_THRESHOLD=80 # Alerts if RAM usage rises over this percentage, ZFS ARC counts as used
|
2025-08-31 14:58:03 -06:00
|
|
|
|
|
|
|
# ZFS health
|
2025-08-31 16:27:13 -06:00
|
|
|
ZFS_STATUS="OK"
|
|
|
|
if [[ $ENABLE_ZFS_MONITOR == 1 ]]; then
|
|
|
|
echo -n "Checking ZFS health: "
|
|
|
|
ZFS_STATUS=$(zpool status -x | grep -q "all pools are healthy" && echo "OK" || zpool status -x)
|
|
|
|
echo "$ZFS_STATUS"
|
|
|
|
fi
|
2025-08-31 14:58:03 -06:00
|
|
|
|
|
|
|
# Disk usage
|
2025-08-31 15:48:54 -06:00
|
|
|
echo -n "Checking disk usage: "
|
|
|
|
DISKS_STATUS="OK"
|
2025-08-31 14:58:03 -06:00
|
|
|
USAGES=()
|
|
|
|
while read -r output;
|
|
|
|
do
|
|
|
|
partition=$(echo "$output" | awk '{ print $2 }')
|
|
|
|
percent=$(echo "$output" | awk '{ print $1 }' | cut -d'%' -f1)
|
|
|
|
if [ $percent -ge $DISK_FULL_ALERT_PERCENT_THRESHOLD ]; then
|
2025-08-31 15:48:54 -06:00
|
|
|
USAGES+=("$partition: $percent% > $DISK_FULL_ALERT_PERCENT_THRESHOLD%")
|
|
|
|
DISKS_STATUS="NOTOK"
|
2025-08-31 14:58:03 -06:00
|
|
|
fi
|
|
|
|
done <<< $(df | grep -vE "^Filesystem|tmpfs|cdrom" | awk '{ print $5 " " $1 }')
|
2025-08-31 15:48:54 -06:00
|
|
|
USAGETEXT=$(IFS=","; echo "${USAGES[*]}")
|
|
|
|
if [[ "$DISKS_STATUS" != "OK" ]]; then
|
|
|
|
DISKS_STATUS="$USAGETEXT"
|
|
|
|
fi
|
|
|
|
echo "$DISKS_STATUS"
|
|
|
|
|
|
|
|
# CPU usage percentage
|
|
|
|
# Calculated from system load, average over past 5 minutes
|
|
|
|
echo -n "Checking CPU load: "
|
|
|
|
SYSTEM_LOAD=$(uptime | awk '{print $11}' | cut -d "," -f 1)
|
|
|
|
CPU_COUNT=$(nproc)
|
|
|
|
CPU_PERCENT=$(awk -v l=$SYSTEM_LOAD -v c=$CPU_COUNT 'BEGIN {printf "%.2f\n", (l/c)*100}')
|
|
|
|
CPU_STATUS="OK"
|
|
|
|
if [[ $CPU_PERCENT > $CPU_USAGE_PERCENT_THRESHOLD ]]; then
|
|
|
|
CPU_STATUS="$CPU_PERCENT% > $CPU_USAGE_PERCENT_THRESHOLD%"
|
|
|
|
fi
|
|
|
|
echo "$CPU_STATUS"
|
|
|
|
|
2025-08-31 16:20:40 -06:00
|
|
|
# Memory usage percentage
|
|
|
|
echo -n "Checking memory usage: "
|
|
|
|
MEM_STATUS="OK"
|
|
|
|
MEM_PCT=$(free -m | awk 'NR==2{ print $3*100/$2 }')
|
|
|
|
if [[ $MEM_PCT > $MEM_USAGE_PERCENT_THRESHOLD ]]; then
|
|
|
|
MEM_STATUS="$MEM_PCT% > $MEM_USAGE_PERCENT_THRESHOLD%"
|
|
|
|
fi
|
2025-08-31 16:37:49 -06:00
|
|
|
echo "$MEM_STATUS"
|
2025-08-31 16:20:40 -06:00
|
|
|
|
|
|
|
|
2025-08-31 15:48:54 -06:00
|
|
|
#
|
|
|
|
# Put it all together
|
|
|
|
#
|
|
|
|
IS_OK=1
|
|
|
|
ERROR_MESSAGES=()
|
|
|
|
if [[ $ZFS_STATUS != "OK" ]]; then
|
|
|
|
IS_OK=0
|
|
|
|
ERROR_MESSAGES+=("ZFS alert: $ZFS_STATUS")
|
|
|
|
fi
|
|
|
|
if [[ $DISKS_STATUS != "OK" ]]; then
|
|
|
|
IS_OK=0
|
|
|
|
ERROR_MESSAGES+=("Disk usage alert: $DISKS_STATUS")
|
|
|
|
fi
|
|
|
|
if [[ $CPU_STATUS != "OK" ]]; then
|
|
|
|
IS_OK=0
|
|
|
|
ERROR_MESSAGES+=("CPU usage alert: $CPU_STATUS")
|
|
|
|
fi
|
2025-08-31 16:20:40 -06:00
|
|
|
if [[ $MEM_STATUS != "OK" ]]; then
|
|
|
|
IS_OK=0
|
|
|
|
ERROR_MESSAGES+=("Memory usage alert: $MEM_STATUS")
|
|
|
|
fi
|
2025-08-31 15:48:54 -06:00
|
|
|
|
|
|
|
|
|
|
|
echo -n "Sending status: "
|
|
|
|
if [[ $IS_OK == "1" ]]; then
|
|
|
|
echo "OK"
|
2025-08-31 16:52:30 -06:00
|
|
|
curl -s -o /dev/null -G "$API_URL$PUSH_KEY?status=up&msg=OK&ping=$CPU_PERCENT"
|
2025-08-31 14:58:03 -06:00
|
|
|
else
|
2025-08-31 15:48:54 -06:00
|
|
|
ERROR_STRING=$(IFS=";"; echo "${ERROR_MESSAGES[*]}")
|
|
|
|
echo $ERROR_STRING
|
|
|
|
curl -s -o /dev/null -G --data-urlencode "status=down" --data-urlencode "msg=$ERROR_STRING" "$API_URL$PUSH_KEY"
|
2025-08-31 14:58:03 -06:00
|
|
|
fi
|