Draft version of extracting script and nagios plugins
authorDmitry Nechai <nechai.dmitry@gmail.com>
Tue, 25 Jun 2013 12:49:26 +0000 (15:49 +0300)
committerDmitry Nechai <nechai.dmitry@gmail.com>
Tue, 25 Jun 2013 12:49:26 +0000 (15:49 +0300)
Open-ILS/src/dashboard/janus.pl [new file with mode: 0755]
Open-ILS/src/dashboard/nagios-plugins/check_memory.sh [new file with mode: 0755]
Open-ILS/src/dashboard/nagios-plugins/linux-cpu-usage.py [new file with mode: 0755]

diff --git a/Open-ILS/src/dashboard/janus.pl b/Open-ILS/src/dashboard/janus.pl
new file mode 100755 (executable)
index 0000000..106b1cd
--- /dev/null
@@ -0,0 +1,80 @@
+#!/usr/bin/perl
+
+use Storable qw(store retrieve);
+use Data::Dumper;
+
+my $perf_log = '/tmp/service-perfdata';
+my $saved_data = '/tmp/janus_data';
+
+if (!open PERF_LOG, $perf_log){
+  die "Cannot open perfomance log: $!";
+}
+
+flock(PERF_LOG, LOCK_EX); # block log file
+my $processed_volume = 0;      #read volume that was processed
+my $volume = -s $perf_log;
+
+seek(PERF_LOG, $processed_volume, 0);  #seek that value
+my %perf_data = ();
+  
+if (-e $saved_data){   #if exists file with data
+  my $perf_data_ref = retrieve($saved_data);   #retrieve data
+  %perf_data = %{$perf_data_ref}; 
+}
+while(<PERF_LOG>){     #read lines
+
+  _process_string($_); #processing of lines
+
+}
+store \%perf_data, $saved_data;        #save data
+
+#save new processed volume
+
+close PERF_LOG; #unblock log file
+
+sub _process_string {
+
+  my ($id,$time, $hostname, $service_desc, $service_output, $service_data) = split /\t/,$_[0];
+  
+  if ($service_desc eq 'Current Load'){  
+    if ($service_output =~ m/load average: (?<one>\d+(\.\d{0,})?), (?<five>\d+(\.\d{0,})?), (?<fifteen>\d+(\.\d{0,})?)/){    
+      $perf_data{$hostname}{'load'}{$time} = [$+{one}, $+{five}, $+{fifteen}];
+      #print "$+{one}, $+{five}, $+{fifteen}\n";
+    } else {   
+      $perf_data{$hostname}{'load'}{$time} = [-1, -1, -1];     
+    }
+  } elsif ($service_desc eq 'Memory Usage'){  
+    if ($service_output =~ m/(?<val>\d+(\.\d{0,})?)% used/){     
+      $perf_data{$hostname}{'mem'}{$time} = $+{val};
+      #print "$+{val}\n";
+    } else {    
+      $perf_data{$hostname}{'mem'}{$time} = -1;      
+    }
+  } elsif ($service_desc eq 'CPU Usage'){  
+    if ($service_output =~ m/user=(?<user>\d+(\.\d{0,})?)% system=(?<sys>\d+(\.\d{0,})?)% iowait=(?<iowait>\d+(\.\d{0,})?)% steal=(?<steal>\d+(\.\d{0,})?)%/){    ;
+      $perf_data{$hostname}{'cpu'}{$time} = [$+{user}, $+{sys}, $+{iowait}, $+{steal}];   
+      #print "$+{user}, $+{sys}, $+{iowait}, $+{steal}\n";
+    } else {    
+      $perf_data{$hostname}{'cpu'}{$time} = [-1,-1,-1,-1];     
+    }
+  } elsif ($service_desc eq 'Disk Space'){  
+    if ($service_output =~ m/free space: \/ (?<mbyte>\d+(\.\d{0,})?) MB \((?<pers>\d+(\.\d{0,})?)%/){    
+      $perf_data{$hostname}{'disk'}{$time} = [$+{mbyte}, $+{pers}]; 
+      #print "$+{mbyte}, $+{pers}\n";
+    } else {       
+      $perf_data{$hostname}{'disk'}{$time} =  [-1, -1];      
+    }
+  } elsif ($service_desc eq 'Total Processes'){  
+    if ($service_output =~ m/(?<procs>\d+) processes/){   
+      $perf_data{$hostname}{'proc'}{$time} =$+{procs};
+      #print "$+{procs}\n";
+    } else {   
+      $perf_data{$hostname}{'proc'}{$time} = -1;      
+    }  
+  } else {
+    #print "Unknown\n";
+    return;
+  }
+  
+
+}
\ No newline at end of file
diff --git a/Open-ILS/src/dashboard/nagios-plugins/check_memory.sh b/Open-ILS/src/dashboard/nagios-plugins/check_memory.sh
new file mode 100755 (executable)
index 0000000..3fc7f11
--- /dev/null
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Script to check memory usage on Linux. Ignores memory used by disk cache.
+#
+# Requires the bc command
+#
+print_help() {
+    echo "Usage:"
+    echo "[-w] Warning level as a percentage"
+    echo "[-c] Critical level as a percentage"
+    exit 0
+}
+
+while test -n "$1"; do
+    case "$1" in
+        --help|-h)
+            print_help
+            exit 0
+            ;;
+        -w)
+            warn_level=$2
+            shift
+            ;;
+        -c)
+            critical_level=$2
+            shift
+            ;;
+        *)
+            echo "Unknown Argument: $1"
+            print_help
+            exit 3
+            ;;
+    esac
+    shift
+done
+
+if [ "$warn_level" == "" ]; then
+    echo "No Warning Level Specified"
+    print_help
+    exit 3;
+fi
+
+if [ "$critical_level" == "" ]; then
+    echo "No Critical Level Specified"
+    print_help
+    exit 3;
+fi
+
+free=`free -m | grep "buffers/cache" | awk '{print $4}'`
+used=` free -m | grep "buffers/cache" | awk '{print $3}'`
+
+total=$(($free+$used))
+
+result=$(echo "$used / $total * 100" |bc -l|cut -c -2)
+
+if [ "$result" -lt "$warn_level" ]; then
+    echo "Memory OK. $result% used."
+    exit 0;
+elif [ "$result" -ge "$warn_level" ] && [ "$result" -le "$critical_level" ]; then
+    echo "Memory WARNING. $result% used."
+    exit 1;
+elif [ "$result" -gt "$critical_level" ]; then
+    echo "Memory CRITICAL. $result% used."
+    exit 2;
+fi 
diff --git a/Open-ILS/src/dashboard/nagios-plugins/linux-cpu-usage.py b/Open-ILS/src/dashboard/nagios-plugins/linux-cpu-usage.py
new file mode 100755 (executable)
index 0000000..748924b
--- /dev/null
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+import sys
+import time
+from optparse import OptionParser
+
+###  Global Identifiers  ###
+cpu_stat_var_array = ('user', 'nice', 'system', 'idle', 'iowait', 'irq', 'softirq', 'steal_time') 
+
+###   Main code   ###
+# Command Line Arguments Parser
+cmd_parser = OptionParser(version="%prog 0.1")
+cmd_parser.add_option("-C", "--CPU", action="store", type="string", dest="cpu_name", help="Which CPU to be Check", metavar="cpu or cpu0 or cpu1")
+cmd_parser.add_option("-w", "--warning", type="int", action="store", dest="warning_per", help="Exit with WARNING status if higher than the PERCENT of CPU Usage", metavar="Warning Percentage")
+cmd_parser.add_option("-c", "--critical", type="int", action="store", dest="critical_per", help="Exit with CRITICAL status if higher than the PERCENT of CPU Usage", metavar="Critical Percentage")
+cmd_parser.add_option("-d", "--debug", action="store_true", dest="debug", default=False, help="enable debug")
+
+(cmd_options, cmd_args) = cmd_parser.parse_args()
+# Check the Command syntax
+if not (cmd_options.cpu_name and cmd_options.warning_per and cmd_options.critical_per):
+    cmd_parser.print_help()
+    sys.exit(3)
+
+# Collect CPU Statistic Object 
+class CollectStat:
+    """Object to Collect CPU Statistic Data"""
+    def __init__(self,cpu_name):
+        
+        self.total = 0 
+        self.cpu_stat_dict = {}
+        
+        for line in open("/proc/stat"):
+            line = line.strip()
+        
+            if line.startswith(cpu_name):
+                cpustat=line.split()
+                cpustat.pop(0)              # Remove the First Array of the Line 'cpu'
+
+                # Remove the unwanted data from the array
+                # only retain first 8 field on the file
+                while len(cpustat) > 8:
+                    cpustat.pop()
+
+                if cmd_options.debug:
+                    print "DEBUG : cpustat array %s" % cpustat
+                        
+                cpustat=map(float, cpustat)     # Convert the Array to Float
+
+                for i in range(len(cpustat)):
+                    self.cpu_stat_dict[cpu_stat_var_array[i]] = cpustat[i]
+
+                # Calculate the total utilization
+                for i in cpustat:
+                    self.total += i 
+
+                break
+
+        if cmd_options.debug:
+            print "DEBUG : cpu statistic dictionary %s" % self.cpu_stat_dict
+            print "DEBUG : total statistics %s" % self.total
+
+# Get Sample CPU Statistics 
+initial_stat = CollectStat(cmd_options.cpu_name)
+time.sleep(5)
+final_stat = CollectStat(cmd_options.cpu_name)
+
+cpu_total_stat = final_stat.total - initial_stat.total
+
+if cmd_options.debug:
+    print "DEBUG : diff total stat %f" % cpu_total_stat
+
+for cpu_stat_var,cpu_stat in final_stat.cpu_stat_dict.items():
+    globals()['cpu_%s_usage_percent' % cpu_stat_var] = ((final_stat.cpu_stat_dict[cpu_stat_var] - initial_stat.cpu_stat_dict[cpu_stat_var])/cpu_total_stat)*100  
+
+cpu_usage_percent = cpu_user_usage_percent + cpu_nice_usage_percent + cpu_system_usage_percent + cpu_iowait_usage_percent + cpu_irq_usage_percent + cpu_softirq_usage_percent + cpu_steal_time_usage_percent
+
+# Check if CPU Usage is Critical/Warning/OK
+if cpu_usage_percent >= cmd_options.critical_per:
+    print cmd_options.cpu_name +' STATISTICS CRITICAL : user=%.2f%% system=%.2f%% iowait=%.2f%% steal=%.2f%% | user=%.2f system=%.2f iowait=%.2f steal=%.2f warn=%d crit=%d' % (cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cmd_options.warning_per, cmd_options.critical_per)
+    sys.exit(2)
+elif  cpu_usage_percent >= cmd_options.warning_per:
+    print cmd_options.cpu_name +' STATISTICS WARNING : user=%.2f%% system=%.2f%% iowait=%.2f%% steal=%.2f%% | user=%.2f system=%.2f iowait=%.2f steal=%.2f warn=%d crit=%d' % (cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cmd_options.warning_per, cmd_options.critical_per)
+    sys.exit(1)
+else:
+    print cmd_options.cpu_name +' STATISTICS OK : user=%.2f%% system=%.2f%% iowait=%.2f%% steal=%.2f%% | user=%.2f system=%.2f iowait=%.2f steal=%.2f warn=%d crit=%d' % (cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cpu_user_usage_percent, cpu_system_usage_percent, cpu_iowait_usage_percent, cpu_steal_time_usage_percent, cmd_options.warning_per, cmd_options.critical_per)
+    sys.exit(0)