#!/usr/bin/perl # This script runs a set of sanity checks on the system # and reports status. ## Reporting thresholds $disk_threshold = 95; # percentage of capacity $load_threshold = 1.5; $miserver_log_threshold = 20000000; # 20M $cpu_threshold = 90; # percent $mem_threshold = 10000000; # 10M # swap thresholds signify minimums # $swap_total_threshold = 1243384; # in kilobytes $swap_total_threshold = 984000; # in kilobytes $swap_available_threshold = 300000; # should have at least 300M available ## Check disk capacity. open (DF, "/usr/local/bin/df -k |") || die $!; while () { next if m,/cdrom,; next unless /(\d+)%/; if ($1 >= $disk_threshold) { print "BAD: disk nearing capacity\n$_\n"; } } close DF; ## Check system load. $uptime = `/bin/uptime`; ($load1, $load5, $load15) = ($uptime =~ m/load average: (\S+) (\S+) (\S+)$/); if ($load15 > $load_threshold) { print "BAD: load average over threshold\n$uptime"; } else { print "load average: $load1 $load5 $load15\n\n"; } ## Check miserver.log size. $miserver_logsize = -s "/home/miadmin/data/miserver.log"; if ($miserver_logsize > $miserver_log_threshold) { printf "BAD: miserver.log getting too big to be useful (%.1fM)\n", $miserver_logsize / 1000000; } ## Try to catch runaway processes. open (PS, "/usr/ucb/ps wwwaux |") || die $!; $_ = ; # throw away header line while () { $cpu = substr($_, 14, 5); $mem = substr($_, 24, 5); if ($cpu > $cpu_threshold && $mem > $mem_threshold) { print "BAD: high memory and CPU usage: $_"; } elsif ($cpu > $cpu_threshold) { print "BAD: high CPU usage: $_"; } elsif ($mem > $mem_threshold) { print "BAD: $high memory usage: $_"; } } close PS; ## Check on swap. chop($swap = `/usr/sbin/swap -s`); ($swap_used, $swap_available) = ($swap =~ m/(\d+)k used, (\d+)k available/); $swap_total = $swap_used + $swap_available; if ($swap_total < $swap_total_threshold) { print "BAD: Not all swap is allocated\n"; } if ($swap_available < $swap_available_threshold) { printf "BAD: Available swap down to %dM\n", $swap_available/1000; } ## Check status of disk mirrors. open (M, "/usr/opt/SUNWmd/sbin/metastat -p |") || die $!; while () { next unless /^d(\d) -m/; if (! /^d$1 -m d\d\d d\d\d (d\d\d )?1/) { print "BAD: malformed mirror: $_\n"; } } ## Report illustra dumptimes. $select_string = "select database_name, database_dumptimes from databases;"; open (MSQL, "/usr/local/bin/msql -U miadmin -P ifmxsux template1 -c \"$select_string\" |") || die $!; while () { print; } close MSQL; $metastat_output = `/usr/opt/SUNWmd/sbin/metastat`; if ($metastat_output =~ /maintenance/i) { print "MUCHO BAD: drives need maintenance:\n$metastat_output"; } open(SYSLOG, "/var/adm/messages") || die $!; while () { next unless /Sense/; print "MUCHO BAD: possible imminent drive failure, check syslog\n"; } close SYSLOG; exit 0;