From: Dmitry Nechai Date: Tue, 25 Jun 2013 12:49:26 +0000 (+0300) Subject: Draft version of extracting script and nagios plugins X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=94007df3cb0a5473f07142282dc8b59d4eff2e28;p=working%2FEvergreen.git Draft version of extracting script and nagios plugins --- diff --git a/Open-ILS/src/dashboard/janus.pl b/Open-ILS/src/dashboard/janus.pl new file mode 100755 index 0000000000..106b1cd39d --- /dev/null +++ b/Open-ILS/src/dashboard/janus.pl @@ -0,0 +1,80 @@ +#!/usr/bin/perl + +use Storable qw(store retrieve); +use Data::Dumper; + +my $perf_log = '/tmp/service-perfdata'; +my $saved_data = '/tmp/janus_data'; + +if (!open PERF_LOG, $perf_log){ + die "Cannot open perfomance log: $!"; +} + +flock(PERF_LOG, LOCK_EX); # block log file +my $processed_volume = 0; #read volume that was processed +my $volume = -s $perf_log; + +seek(PERF_LOG, $processed_volume, 0); #seek that value +my %perf_data = (); + +if (-e $saved_data){ #if exists file with data + my $perf_data_ref = retrieve($saved_data); #retrieve data + %perf_data = %{$perf_data_ref}; +} +while(){ #read lines + + _process_string($_); #processing of lines + +} +store \%perf_data, $saved_data; #save data + +#save new processed volume + +close PERF_LOG; #unblock log file + +sub _process_string { + + my ($id,$time, $hostname, $service_desc, $service_output, $service_data) = split /\t/,$_[0]; + + if ($service_desc eq 'Current Load'){ + if ($service_output =~ m/load average: (?\d+(\.\d{0,})?), (?\d+(\.\d{0,})?), (?\d+(\.\d{0,})?)/){ + $perf_data{$hostname}{'load'}{$time} = [$+{one}, $+{five}, $+{fifteen}]; + #print "$+{one}, $+{five}, $+{fifteen}\n"; + } else { + $perf_data{$hostname}{'load'}{$time} = [-1, -1, -1]; + } + } elsif ($service_desc eq 'Memory Usage'){ + if ($service_output =~ m/(?\d+(\.\d{0,})?)% used/){ + $perf_data{$hostname}{'mem'}{$time} = $+{val}; + #print "$+{val}\n"; + } else { + $perf_data{$hostname}{'mem'}{$time} = -1; + } + } elsif ($service_desc eq 'CPU Usage'){ + if ($service_output =~ m/user=(?\d+(\.\d{0,})?)% system=(?\d+(\.\d{0,})?)% iowait=(?\d+(\.\d{0,})?)% steal=(?\d+(\.\d{0,})?)%/){ ; + $perf_data{$hostname}{'cpu'}{$time} = [$+{user}, $+{sys}, $+{iowait}, $+{steal}]; + #print "$+{user}, $+{sys}, $+{iowait}, $+{steal}\n"; + } else { + $perf_data{$hostname}{'cpu'}{$time} = [-1,-1,-1,-1]; + } + } elsif ($service_desc eq 'Disk Space'){ + if ($service_output =~ m/free space: \/ (?\d+(\.\d{0,})?) MB \((?\d+(\.\d{0,})?)%/){ + $perf_data{$hostname}{'disk'}{$time} = [$+{mbyte}, $+{pers}]; + #print "$+{mbyte}, $+{pers}\n"; + } else { + $perf_data{$hostname}{'disk'}{$time} = [-1, -1]; + } + } elsif ($service_desc eq 'Total Processes'){ + if ($service_output =~ m/(?\d+) processes/){ + $perf_data{$hostname}{'proc'}{$time} =$+{procs}; + #print "$+{procs}\n"; + } else { + $perf_data{$hostname}{'proc'}{$time} = -1; + } + } else { + #print "Unknown\n"; + return; + } + + +} \ No newline at end of file diff --git a/Open-ILS/src/dashboard/nagios-plugins/check_memory.sh b/Open-ILS/src/dashboard/nagios-plugins/check_memory.sh new file mode 100755 index 0000000000..3fc7f1155b --- /dev/null +++ b/Open-ILS/src/dashboard/nagios-plugins/check_memory.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Script to check memory usage on Linux. Ignores memory used by disk cache. +# +# Requires the bc command +# +print_help() { + echo "Usage:" + echo "[-w] Warning level as a percentage" + echo "[-c] Critical level as a percentage" + exit 0 +} + +while test -n "$1"; do + case "$1" in + --help|-h) + print_help + exit 0 + ;; + -w) + warn_level=$2 + shift + ;; + -c) + critical_level=$2 + shift + ;; + *) + echo "Unknown Argument: $1" + print_help + exit 3 + ;; + esac + shift +done + +if [ "$warn_level" == "" ]; then + echo "No Warning Level Specified" + print_help + exit 3; +fi + +if [ "$critical_level" == "" ]; then + echo "No Critical Level Specified" + print_help + exit 3; +fi + +free=`free -m | grep "buffers/cache" | awk '{print $4}'` +used=` free -m | grep "buffers/cache" | awk '{print $3}'` + +total=$(($free+$used)) + +result=$(echo "$used / $total * 100" |bc -l|cut -c -2) + +if [ "$result" -lt "$warn_level" ]; then + echo "Memory OK. $result% used." + exit 0; +elif [ "$result" -ge "$warn_level" ] && [ "$result" -le "$critical_level" ]; then + echo "Memory WARNING. $result% used." + exit 1; +elif [ "$result" -gt "$critical_level" ]; then + echo "Memory CRITICAL. $result% used." + exit 2; +fi diff --git a/Open-ILS/src/dashboard/nagios-plugins/linux-cpu-usage.py b/Open-ILS/src/dashboard/nagios-plugins/linux-cpu-usage.py new file mode 100755 index 0000000000..748924bd15 --- /dev/null +++ b/Open-ILS/src/dashboard/nagios-plugins/linux-cpu-usage.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +import sys +import time +from optparse import OptionParser + +### Global Identifiers ### +cpu_stat_var_array = ('user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'steal_time') + +### Main code ### +# Command Line Arguments Parser +cmd_parser = OptionParser(version="%prog 0.1") +cmd_parser.add_option("-C", "--CPU", action="store", type="string", dest="cpu_name", help="Which CPU to be Check", metavar="cpu or cpu0 or cpu1") +cmd_parser.add_option("-w", "--warning", type="int", action="store", dest="warning_per", help="Exit with WARNING status if higher than the PERCENT of CPU Usage", metavar="Warning Percentage") +cmd_parser.add_option("-c", "--critical", type="int", action="store", dest="critical_per", help="Exit with CRITICAL status if higher than the PERCENT of CPU Usage", metavar="Critical Percentage") +cmd_parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="enable debug") + +(cmd_options, cmd_args) = cmd_parser.parse_args() +# Check the Command syntax +if not (cmd_options.cpu_name and cmd_options.warning_per and cmd_options.critical_per): + cmd_parser.print_help() + sys.exit(3) + +# Collect CPU Statistic Object +class CollectStat: + """Object to Collect CPU Statistic Data""" + def __init__(self,cpu_name): + + self.total = 0 + self.cpu_stat_dict = {} + + for line in open("/proc/stat"): + line = line.strip() + + if line.startswith(cpu_name): + cpustat=line.split() + cpustat.pop(0) # Remove the First Array of the Line 'cpu' + + # Remove the unwanted data from the array + # only retain first 8 field on the file + while len(cpustat) > 8: + cpustat.pop() + + if cmd_options.debug: + print "DEBUG : cpustat array %s" % cpustat + + cpustat=map(float, cpustat) # Convert the Array to Float + + for i in range(len(cpustat)): + self.cpu_stat_dict[cpu_stat_var_array[i]] = cpustat[i] + + # Calculate the total utilization + for i in cpustat: + self.total += i + + break + + if cmd_options.debug: + print "DEBUG : cpu statistic dictionary %s" % self.cpu_stat_dict + print "DEBUG : total statistics %s" % self.total + +# Get Sample CPU Statistics +initial_stat = CollectStat(cmd_options.cpu_name) +time.sleep(5) +final_stat = CollectStat(cmd_options.cpu_name) + +cpu_total_stat = final_stat.total - initial_stat.total + +if cmd_options.debug: + print "DEBUG : diff total stat %f" % cpu_total_stat + +for cpu_stat_var,cpu_stat in final_stat.cpu_stat_dict.items(): + globals()['cpu_%s_usage_percent' % cpu_stat_var] = ((final_stat.cpu_stat_dict[cpu_stat_var] - initial_stat.cpu_stat_dict[cpu_stat_var])/cpu_total_stat)*100 + +cpu_usage_percent = cpu_user_usage_percent + cpu_nice_usage_percent + cpu_system_usage_percent + cpu_iowait_usage_percent + cpu_irq_usage_percent + cpu_softirq_usage_percent + cpu_steal_time_usage_percent + +# Check if CPU Usage is Critical/Warning/OK +if cpu_usage_percent >= cmd_options.critical_per: + print cmd_options.cpu_name +' STATISTICS CRITICAL : user=%.2f%% system=%.2f%% iowait=%.2f%% steal=%.2f%% | user=%.2f system=%.2f iowait=%.2f steal=%.2f warn=%d crit=%d' % (cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cmd_options.warning_per, cmd_options.critical_per) + sys.exit(2) +elif cpu_usage_percent >= cmd_options.warning_per: + print cmd_options.cpu_name +' STATISTICS WARNING : user=%.2f%% system=%.2f%% iowait=%.2f%% steal=%.2f%% | user=%.2f system=%.2f iowait=%.2f steal=%.2f warn=%d crit=%d' % (cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cmd_options.warning_per, cmd_options.critical_per) + sys.exit(1) +else: + print cmd_options.cpu_name +' STATISTICS OK : user=%.2f%% system=%.2f%% iowait=%.2f%% steal=%.2f%% | user=%.2f system=%.2f iowait=%.2f steal=%.2f warn=%d crit=%d' % (cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cmd_options.warning_per, cmd_options.critical_per) + sys.exit(0)