--- /dev/null
+package Logger;
+# Logger.pm
+#---------------------------------------------------
+#
+# Basic module to log to file
+#
+#---------------------------------------------------
+# Constructor for logger module
+
+sub new{
+ my $class = shift;
+ my $args = shift;
+
+ # Default is to create a new logfile, NOT to append to it
+ my $append = 0;
+ if (defined($args->{append})){
+ $append = $args->{append};
+ }
+
+ my $disable = 0;
+ if (defined($args->{disable})){
+ $disable = $args->{disable};
+ }
+
+ my $self = { filename => $args->{filename},
+ append => $append,
+ line_no => 0,
+ disable => $disable
+ };
+
+ bless($self,$class);
+ $self->_init();
+}
+
+#---------------------------------------------------
+# Initialise the log file
+
+sub _init{
+ my $self = shift;
+ if (!$self->{disable}){
+ $filename = $self->{filename};
+
+ if ($self->{append}){
+ # Open a file for appending in write mode
+ open(LOG,">> $filename");
+ select LOG;
+ $| = 1;
+ select STDOUT;
+ $self->{log_handle} = *LOG;
+ }
+ else{
+ # Create a new file in write mode
+ open(LOG,"> $filename");
+ select LOG;
+ $| = 1;
+ select STDOUT;
+ $self->{log_handle} = *LOG;
+ }
+ }
+
+ return $self;
+}
+
+#---------------------------------------------------
+# Add to the log file
+
+sub add{
+ my $self = shift;
+ if (!$self->{disable}){
+ my $str = shift;
+
+ my $handle = $self->{log_handle};
+ my $timestamp = localtime(time);
+ my $line_no = ++$self->{line_no};
+ #print $handle "[$timestamp]: ". $str;
+ print $handle "". $str
+ }
+}
+
+#---------------------------------------------------
+# Close log file
+
+sub close{
+ my $self = shift;
+ if (!$self->{disable}){
+ my $handle = $self->{log_handle};
+ close($handle);
+ }
+}
+
+#---------------------------------------------------
+1;
+
--- /dev/null
+ON THE CENTRAL SYSLOG MACHINE
+
+syslog-ng setup:
+
+- Add the following lines in the appropriate places in /etc/syslog-ng/syslog-ng.conf (edit/replace the filter f_system line don't duplicate it)
+
+edit /etc/syslog-ng/syslog-ng.conf destination d_external_eg_stats { # f_egstats
+file("/var/log/evergreen/prod/$YEAR/$MONTH/$DAY/eg_stats.log",
+template("$YEAR-$MONTH-$DAY $HOUR:$MIN:$SEC $HOST $MSG\n") owner(nagios) group(nagios) template-escape(no)); };
+
+filter f_egstats { match("eg-stats") and not match("CRON"); };
+filter f_system { not facility(local0, local1, local2, local3, local4, local5, local6, local7) and not filter(f_egstats); };
+
+log { source(s_all); filter(f_egstats); destination(d_external_eg_stats); };
+
+Restart syslog:
+
+/etc/init.d/syslog-ng restart
+
+---------------
+
+rsyslog setup:
+
+- Add the following lines in the appropriate places in /etc/rsyslog.d/evergreen-rsyslog.conf [ wget -O evergreen-rsyslog.conf "http://git.evergreen-ils.org/?p=Evergreen.git;a=blob_plain;f=Open-ILS/examples/evergreen-rsyslog.conf" ]
+
+$template egstats,"/var/log/remote/evergreen/%$YEAR%/%$MONTH%/%$DAY%/eg_stats.%$HOUR%.log"
+
+if $programname contains 'eg-stats' then
+{
+local0.info ?egstats;msgformat
+& ~
+}
+
+Restart rsyslog:
+
+/etc/init.d/rsyslog restart
+
+--------------------------
+
+ON THE ASP MACHINES
+
+cd /usr/share/perl5 && sudo wget -O Logger.pm "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=monitoring/eg-stats/Logger.pm"
+cd /usr/bin && sudo wget -O "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=monitoring/eg-stats/eg-stats-collector-remote-log.pl"
+sudo chmod +x eg-stats-collector-remote-log.pl
+
+syslog-ng:
+cd /usr/bin && sudo wget -O "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=monitoring/eg-stats/eg-stats-keepalive_syslog-ng.sh"
+
+rsyslog:
+cd /usr/bin && sudo wget -O "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=monitoring/eg-stats/eg-stats-keepalive_rsyslog.sh"
+
+sudo chmod +x eg-stats-keepalive.sh
+
+-----------------
+
+syslog-ng setup:
+
+- Add the following lines to the bottom of /etc/syslog-ng/syslog-ng.conf: #eg-stats-collector
+source s_file_eg-statslog { file("/var/log/evergreen/prod/$YEAR/$MONTH/$DAY/eg-stats.log" follow_freq(1)); };
+log { source(s_file_eg-statslog); destination(d_ext); };
+
+Restart syslog:
+
+/etc/init.d/syslog-ng restart
+
+---------------
+
+rsyslog setup:
+
+- Add the following lines below the RULES section (replace syslog_server with your central syslog machine name or ip):
+#
+# Evergreen
+#
+local0.* @syslog_server:514
+local1.* @syslog_server:514
+local2.* @syslog_server:514
+local3.* @syslog_server:514
+local6.* @syslog_server:514
+local7.* @syslog_server:514
+local0.* ~
+local1.* ~
+local2.* ~
+local3.* ~
+local6.* ~
+local7.* ~
+
+Restart rsyslog:
+
+/etc/init.d/rsyslog restart
+
+-------------------------------
+
+Edit crontab: crontab -e Insert this line: */1 * * * * /usr/bin/eg-stats-keepalive.sh >/dev/null 2>&1
+
--- /dev/null
+#!/usr/bin/perl
+use Getopt::Long;
+use XML::LibXML;
+use Sys::Syslog;
+
+my ( $config_file, $timeout, $run_once, $cpu_mode, $mem_mode, $drone_mode,
+ $help, $services )
+ = ( '/openils/conf/opensrf.xml', 15, 0 );
+GetOptions(
+ 'config=s' => \$config_file,
+ '1' => \$run_once,
+ 'cpu' => \$cpu_mode,
+ 'ram' => \$mem_mode,
+ 'opensrf' => \$drone_mode,
+ 'service=s' => \$services,
+ 'delay=i' => \$timeout,
+ 'help' => \$help
+);
+
+help() && exit if ($help);
+
+$cpu_mode = $mem_mode = $drone_mode = 1
+ if ( !$cpu_mode && !$mem_mode && !$drone_mode );
+
+my @prev_data;
+
+openlog( 'eg-stats', 'ndelay', 'local0' );
+
+# gather data ...
+if ($cpu_mode) {
+ open PROCSTAT, '/proc/stat';
+ my $line = <PROCSTAT>;
+ close PROCSTAT;
+
+ chomp $line;
+
+ @prev_data = split /\s+/, $line;
+
+ sleep($timeout);
+}
+
+$services = { map { $_ => 1 } split ',', $services } if ($services);
+
+my @apps;
+my @activeapps;
+if ($drone_mode) {
+ my $parser = XML::LibXML->new();
+
+ # Return an XML::LibXML::Document object
+ my $config = $parser->parse_file($config_file);
+
+ @activeapps = $config->findnodes('/opensrf/hosts/*/activeapps/*');
+ @apps = $config->findnodes('/opensrf/default/apps/*');
+
+ unless(%$services) {
+ %$services = map { $_->textContent => 1 } @activeapps;
+ }
+
+}
+
+do {
+
+ if ($cpu_mode) {
+ open PROCSTAT, '/proc/stat';
+ my $line = <PROCSTAT>;
+ close PROCSTAT;
+
+ chomp $line;
+
+ my @current_data = split /\s+/, $line;
+ pop @current_data;
+
+ if (@prev_data) {
+ my @delta;
+ for my $i ( 0 .. 8 ) {
+ $delta[$i] = $current_data[$i] - $prev_data[$i];
+ }
+
+ my $total = 0;
+ $total += $_ for (@delta);
+
+ my $res = sprintf(
+ 'CPU : ' . "user:"
+ . sprintf( '%0.2f', ( $delta[1] / $total ) * 100 ) . ', '
+ . "idle:"
+ . sprintf( '%0.2f', ( $delta[4] / $total ) * 100 ) . ', '
+ . "iow:"
+ . sprintf( '%0.2f', ( $delta[5] / $total ) * 100 ) . ', '
+ . "steal:"
+ . sprintf( '%0.2f', ( $delta[8] / $total ) * 100 ) . "\n"
+ );
+ syslog( LOG_INFO, $res );
+ }
+
+ @prev_data = @current_data;
+ }
+
+ if ($drone_mode) {
+ my @data = split /\n/s, `ps ax|grep OpenSRF`;
+ my %service_data;
+ for (@data) {
+ if (/OpenSRF (\w+) \[([^\]]+)\]/) {
+ my ( $s, $t ) = ( $2, lc($1) );
+ next unless exists $services->{$s};
+ if ( !exists( $service_data{$s}{$t} ) ) {
+ $service_data{$s}{$t} = 1;
+ }
+ else {
+ $service_data{$s}{$t}++;
+ }
+ }
+ }
+
+ for my $s (sort keys %$services) {
+ my ($node) = grep { $_->nodeName eq $s } @apps;
+ next unless ($node);
+
+ my $max_kids = $node->findvalue('unix_config/max_children');
+
+ my $lcount = $service_data{$s}{listener} || 0;
+ my $dcount = $service_data{$s}{drone} || 0;
+ $res = sprintf( "SERVICE ($s) : "
+ . "listener count: $lcount, drone count: $dcount/$max_kids" );
+
+ syslog( LOG_INFO, $res );
+ }
+ }
+
+ if ($mem_mode) {
+ my @memdata = split /\n/s, `cat /proc/meminfo`;
+ my %memparts;
+ for (@memdata) {
+ if (/^(\w+):\s+(\d+)/) {
+ $memparts{$1} = $2;
+ }
+ }
+
+ my $total = $memparts{MemTotal};
+ my $free = $memparts{MemFree};
+ my $buffers = $memparts{Buffers};
+ my $cached = $memparts{Cached};
+ my $available = $free + $buffers + $cached;
+
+ $res = sprintf(
+ 'RAM : '
+ . "total:$total kB, "
+ . "free:$free kB, "
+ . "buffers:$buffers kB, "
+ . "cached:$cached kB, "
+ . "available:$available kB, "
+ . 'free%:'
+ . sprintf( '%0.2f', ( $free / $total ) * 100 ) . ', '
+ . 'buffers%:'
+ . sprintf( '%0.2f', ( $free / $total ) * 100 ) . ', '
+ . 'cached%:'
+ . sprintf( '%0.2f', ( $cached / $total ) * 100 ) . ', '
+ . 'available%:'
+ . sprintf( '%0.2f', ( $available / $total ) * 100 ) . "\n"
+ );
+ syslog( LOG_INFO, $res );
+ }
+
+ $run_once--;
+} while ( $run_once != 0 && sleep($timeout) );
+
+sub help {
+ print <<HELP;
+
+Evergreen Server Health Monitor
+
+ --config=<config_file>
+ OpenSRF configuration file for Evergreen.
+ Default: /openils/conf/opensrf.xml
+
+ --1
+ Run once and stop
+
+ --cpu
+ Collect CPU usage stats via /proc/stat
+
+ --ram
+ Collect RAM usage stats via /proc/meminfo
+
+ --opensrf
+ Collect Evergreen service status info from the output of ps
+
+ --service=<service name>
+ Comma separated list of services to report on. If not supplied, all
+ services are reported.
+
+ --delay=<seconds>
+ Delay time for collecting CPU stats.
+ Default: 5
+
+ --help
+ Print this help message
+
+HELP
+}
+
--- /dev/null
+count=`ps ax|grep eg-stats-collector-remote-log.pl|grep -v grep| wc -l`
+if [ $count -lt 1 ] ; then
+ /usr/bin/eg-stats-collector-remote-log.pl &
+fi
--- /dev/null
+count=`ps ax|grep eg-stats-collector-remote-log.pl|grep -v grep| wc -l`
+if [ $count -lt 1 ] ; then
+ /usr/bin/eg-stats-collector-remote-log.pl --service=open-ils.acq,open-ils.auth,open-ils.search,open-ils.actor,open-ils.booking,open-ils.cat,open-ils.supercat,open-ils.trigger,opensrf.math,opensrf.dbmath,open-ils.penalty,open-ils.circ,open-ils.ingest,open-ils.storage,open-ils.cstore,open-ils.pcrud,opensrf.settings,open-ils.collections,open-ils.reporter,open-ils.reporter-store,open-ils.permacrud,open-ils.fielder,open-ils.vandelay &
+ sleep 1
+ /etc/init.d/syslog-ng restart
+fi
--- /dev/null
+#!/usr/bin/perl -w
+package main;
+use POSIX;
+use strict;
+
+
+#get date info
+my ($SEC, $MIN, $HOUR, $DAY,$MONTH,$YEAR) = (localtime(time))[0,1,2,3,4,5,6];
+$YEAR+=1900;
+$MONTH++;
+if ($DAY < 10) {
+ $DAY = "0".$DAY;
+}
+if ($MONTH < 10) {
+ $MONTH = "0".$MONTH;
+}
+
+my $posF = "/tmp/eg_stats_position.log";
+my $statsF = "/var/log/evergreen/prod/$YEAR/$MONTH/$DAY/eg_stats.log";
+my $pos;
+my $loc;
+my $status = 0; #status is OK!
+my $info = "";
+
+#if it exists open it and get the current position
+#if not set the current position to 0
+if (-e $posF) {
+ open(DATA, "<$posF");
+ my @values = <DATA>;
+ if (@values != 2) { #make sure the array is the correct size
+ $pos = 0;
+ } else {
+ chomp($loc = $values[0]);
+ if ($loc ne $statsF) { #check to see that we are in the correct file
+ $pos = 0;
+ } else {
+ chomp($pos = $values[1]);
+ #check to see if $pos is a valid positive integer(or 0), if not set to 0
+ if (!( $pos =~ /^\d+$/ )) {
+ $pos = 0;
+ }
+ }
+ }
+ close DATA;
+} else {
+ $pos = 0;
+}
+#parse the file and output for Nagios if necessary
+if (-e $statsF) {
+ open(DATA, "<$statsF");
+ seek DATA, $pos, 0;
+ while(<DATA>) {
+ my($line) = $_;
+ chomp($line);
+ #check for lost controller first
+ if (($line =~ m/listener count: 0/) || ($line =~ m/controller count: 0/) || ($line =~ m/master count: 0/)){
+ if ($line =~ m/listener count: 0/) {
+ $info = $info."Lost a listener: $line - ";
+ }
+ if ($line =~ m/master count: 0/) {
+ $info = $info."Lost a master: $line - ";
+ }
+ if ($line =~ m/controller count: 0/) {
+ $info = $info."Lost a controller: $line - ";
+ }
+ $status = 2;
+ }
+ #now check for drone ratio
+ if ($line =~ m/SERVICE/) {
+ my ($count) = $line =~ /drone count: (\d+\/\d+)/i;
+ my $ratio = eval($count);
+ my $pct = ceil($ratio * 100);
+ if ($ratio >= 0.75) {
+ $info = $info."Drone count is $pct % - $line - ";
+ $status = 1;
+ }
+ if ($ratio >= 0.9) {
+ $info = $info."Drone count is $pct % - $line - ";
+ $status = 2;
+ }
+ }
+
+ }
+ $pos = tell DATA;
+ close DATA;
+} else {
+ $pos = 0;
+}
+
+#update position info
+open(DATA, ">$posF");
+print DATA "$statsF\n$pos\n";
+close DATA;
+
+if ($info eq "") {
+ $info = "EG-STATS-COLLECTOR STATUS: OK!";
+}
+
+print $info;
+exit $status;
--- /dev/null
+#!/bin/sh
+# Copyright (C) 2008-2013 Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author : Michael Tate, Sys Admin, ESI, to allow for a path from the command line
+# Based on code written by Don McMorris <dmcmorris@esilibrary.com>; see check_null
+# Purpose : Look for excessive NOT CONNECTEDS in the osrfsys logs
+
+USAGE="check_notconnected <LOG PATH>"
+# <LOG PATH> : The path to the log location.
+# : This plugin assumes that the logs will be dropped into folders for
+# : year (4 char), month (2 char), and day (2 char).
+# : If no path is entered, it will default to "/var/log/evergreen/prod/"
+
+
+if [ -n "$1" ]; then
+ if [[ $1 == *help* ]]; then
+ EXITSTATUS="$USAGE"
+ EXITCODE="0"
+ else
+
+
+ if [ -n "$1" ]; then
+ LOGPATH="$1/$(date +%Y/%m/%d)"
+ else
+ LOGPATH="/var/log/evergreen/prod/$(date +%Y/%m/%d)"
+ fi
+
+LOGFILE="$LOGPATH/osrfsys.$(date +%H).log"
+
+
+NCCOUNT=`grep -c 'IS NOT CONNECTED TO THE NETWORK' $LOGFILE`
+
+ if [ "$NCCOUNT" -gt "0" ]; then
+ TOPSERVER=$(grep "IS NOT CONNECTED TO THE NETWORK" $LOGFILE | cut -d" " -f3 | sort | uniq -c | sort -nr | head -1)
+ SVRMSG=" (Top server this hour: $TOPSERVER)"
+ else
+ SVRMSG="."
+ fi
+
+ if [ "$NCCOUNT" -ge "4" ]; then
+ EXITSTATUS="CRIT: $NCCOUNT NOT CONNECTEDs returned this hour:$SVRMSG"
+ EXITCODE="2"
+ elif [ "$NCCOUNT" -ge "2" ]; then
+ EXITSTATUS="WARN: $NCCOUNT NOT CONNECTEDs returned this hour:$SVRMSG"
+ EXITCODE="1"
+ elif [ "$NCCOUNT" -lt "2" ]; then
+ EXITSTATUS="OK: $NCCOUNT NOT CONNECTEDs returned this hour$SVRMSG"
+ EXITCODE="0"
+ else
+ EXITSTATUS="WARN: An error has occurred in the plugin"
+ EXITCODE="1"
+ fi
+
+ fi
+fi
+
+echo "$EXITSTATUS"
+exit $EXITCODE
+
--- /dev/null
+#!/bin/sh
+# Copyright (C) 2008-2010 Equinox Software, Inc.
+# Written by Don McMorris <dmcmorris@esilibrary.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+#
+# Author : Don McMorris, Sys Admin, ESI
+# Modified : Michael Tate, Sys Admin, ESI, to allow for a path from the command line
+# Purpose : Look for excessive NULLS in the gateway logs
+
+USAGE="check_null <# mins to check> <WARNLIMIT> <CRITLIMIT> <LOG PATH>"
+# <# mins to check> : Check for errors in the last # minutes
+# <WARNLIMIT> : The number of NULLS in the logs at which to present a warning condition.
+# <CRITLIMIT> : The number of NULLS on the logs at which to present a critical condition.
+# <LOG PATH> : The path to the log location.
+# : This plugin assumes that the logs will be dropped into folders for
+# : year (4 char), month (2 char), and day (2 char).
+# : If no path is entered, it will default to "/var/log/evergreen/prod/"
+
+if [ -n "$1" ]; then
+ if [[ $1 == *help* ]]; then
+ else
+
+PERIOD=$1
+WARNLIMIT=$2
+CRITLIMIT=$3
+ if [ -n "$4" ]; then
+ LOGPATH="$4/$(date +%Y/%m/%d)"
+ else
+ LOGPATH="/var/log/evergreen/prod/$(date +%Y/%m/%d)"
+ fi
+
+PREVTOT=0
+LOGFILE="$LOGPATH/gateway.$(date +%H).log"
+
+if [ $(date +%H | cut -b1) = 0 ]; then
+ CURRHOUR=$(date +%H | cut -b2)
+else
+ CURRHOUR=$(date +%H)
+fi
+
+if [ $(date +%M | cut -b1) = 0 ]; then
+ CURRMIN=$(date +%M | cut -b2 )
+else
+ CURRMIN=$(date +%M)
+fi
+
+if [ $CURRMIN -lt $PERIOD ]; then
+ # How many minutes of the last hour do we need to check?
+ TMPDIFFM2=$((60 - $(($PERIOD - $CURRMIN))))
+
+ # This logic will mean that "Returning NULL"'s logged at the late 2300 hour will not be counted during the early Midnight hour check.
+ # This is acceptable for now.
+ if [ $CURRHOUR -gt 0 ]; then
+ # define LOGFILE2 (last hours' log)
+ if [ $CURRHOUR -gt 11 ]; then
+ LOGFILE2="$LOGPATH/gateway.$(($CURRHOUR - 1)).log"
+ else
+ LOGFILE2="$LOGPATH/gateway.0$(($CURRHOUR - 1)).log"
+ fi
+
+ while [ $TMPDIFFM2 -lt 60 ]; do
+ PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE2 | cut -d":" -f2 | grep -c $TMPDIFFM2)))
+ TMPDIFFM2=$(($TMPDIFFM2 + 1))
+ done
+ fi
+ while [ $TMPDIFF1 -le $CURRMIN ]; do
+ PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE | cut -d":" -f2 | grep -c $TMPDIFF1)))
+ TMPDIFF1=$(($TMPDIFF1 + 1))
+ done
+else
+ TMPDIFF1=$(($CURRMIN-$PERIOD))
+ while [ $TMPDIFF1 -le $CURRMIN ]; do
+ PREVTOT=$(($PREVTOT + $(grep "Returning NULL" $LOGFILE | cut -d":" -f2 | grep -c $TMPDIFF1)))
+ TMPDIFF1=$(($TMPDIFF1 + 1))
+ done
+
+fi
+
+
+TOPSERVER=$(grep "Returning NULL" $LOGFILE | cut -d" " -f3 | sort | uniq -c | sort -nr | head -1)
+
+if [ "$TOPSERVER" != null ]; then
+ SVRMSG=" (Top server this hour: $TOPSERVER)"
+else
+ SVRMSG="."
+fi
+
+if [ $PREVTOT -ge $CRITLIMIT ]; then
+ echo "CRIT: $PREVTOT NULLs returned in past $PERIOD minutes$SVRMSG"
+ exit 2
+elif [ $PREVTOT -ge $WARNLIMIT ]; then
+ echo "WARN: $PREVTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
+ exit 1
+elif [ $PREVTOT -lt $WARNLIMIT ]; then
+ echo "OK: $PREVTOT NULLs returned in the past $PERIOD minutes$SVRMSG"
+ exit 0
+else
+ echo "WARN: An error has occurred $PREVTOT $PERIOD"
+ exit 1
+fi
+
+
+fi
+