From: Anoop Atre Date: Wed, 4 Sep 2013 21:56:28 +0000 (-0400) Subject: Adding eg-stats files for monitoring Evergreen services X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=d01c046e9c79e7337e58c9032d7e8a6e9d7d54c1;p=contrib%2Fequinox.git Adding eg-stats files for monitoring Evergreen services --- diff --git a/eg-stats/Logger.pm b/eg-stats/Logger.pm new file mode 100644 index 0000000..01197ab --- /dev/null +++ b/eg-stats/Logger.pm @@ -0,0 +1,93 @@ +package Logger; +# Logger.pm +#--------------------------------------------------- +# +# Basic module to log to file +# +#--------------------------------------------------- +# Constructor for logger module + +sub new{ + my $class = shift; + my $args = shift; + + # Default is to create a new logfile, NOT to append to it + my $append = 0; + if (defined($args->{append})){ + $append = $args->{append}; + } + + my $disable = 0; + if (defined($args->{disable})){ + $disable = $args->{disable}; + } + + my $self = { filename => $args->{filename}, + append => $append, + line_no => 0, + disable => $disable + }; + + bless($self,$class); + $self->_init(); +} + +#--------------------------------------------------- +# Initialise the log file + +sub _init{ + my $self = shift; + if (!$self->{disable}){ + $filename = $self->{filename}; + + if ($self->{append}){ + # Open a file for appending in write mode + open(LOG,">> $filename"); + select LOG; + $| = 1; + select STDOUT; + $self->{log_handle} = *LOG; + } + else{ + # Create a new file in write mode + open(LOG,"> $filename"); + select LOG; + $| = 1; + select STDOUT; + $self->{log_handle} = *LOG; + } + } + + return $self; +} + +#--------------------------------------------------- +# Add to the log file + +sub add{ + my $self = shift; + if (!$self->{disable}){ + my $str = shift; + + my $handle = $self->{log_handle}; + my $timestamp = localtime(time); + my $line_no = ++$self->{line_no}; + #print $handle "[$timestamp]: ". $str; + print $handle "". $str + } +} + +#--------------------------------------------------- +# Close log file + +sub close{ + my $self = shift; + if (!$self->{disable}){ + my $handle = $self->{log_handle}; + close($handle); + } +} + +#--------------------------------------------------- +1; + diff --git a/eg-stats/README b/eg-stats/README new file mode 100644 index 0000000..b777b17 --- /dev/null +++ b/eg-stats/README @@ -0,0 +1,94 @@ +ON THE CENTRAL SYSLOG MACHINE + +syslog-ng setup: + +- Add the following lines in the appropriate places in /etc/syslog-ng/syslog-ng.conf (edit/replace the filter f_system line don't duplicate it) + +edit /etc/syslog-ng/syslog-ng.conf destination d_external_eg_stats { # f_egstats +file("/var/log/evergreen/prod/$YEAR/$MONTH/$DAY/eg_stats.log", +template("$YEAR-$MONTH-$DAY $HOUR:$MIN:$SEC $HOST $MSG\n") owner(nagios) group(nagios) template-escape(no)); }; + +filter f_egstats { match("eg-stats") and not match("CRON"); }; +filter f_system { not facility(local0, local1, local2, local3, local4, local5, local6, local7) and not filter(f_egstats); }; + +log { source(s_all); filter(f_egstats); destination(d_external_eg_stats); }; + +Restart syslog: + +/etc/init.d/syslog-ng restart + +--------------- + +rsyslog setup: + +- Add the following lines in the appropriate places in /etc/rsyslog.d/evergreen-rsyslog.conf [ wget -O evergreen-rsyslog.conf "http://git.evergreen-ils.org/?p=Evergreen.git;a=blob_plain;f=Open-ILS/examples/evergreen-rsyslog.conf" ] + +$template egstats,"/var/log/remote/evergreen/%$YEAR%/%$MONTH%/%$DAY%/eg_stats.%$HOUR%.log" + +if $programname contains 'eg-stats' then +{ +local0.info ?egstats;msgformat +& ~ +} + +Restart rsyslog: + +/etc/init.d/rsyslog restart + +-------------------------- + +ON THE ASP MACHINES + +cd /usr/share/perl5 && sudo wget -O Logger.pm "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=eg-stats/Logger.pm" +cd /usr/bin && sudo wget -O "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=eg-stats/eg-stats-collector-remote-log.pl" +sudo chmod +x eg-stats-collector-remote-log.pl + +syslog-ng: +cd /usr/bin && sudo wget -O "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=eg-stats/eg-stats-keepalive_syslog-ng.sh" + +rsyslog: +cd /usr/bin && sudo wget -O "http://git.evergreen-ils.org/?p=contrib/equinox.git;a=blob_plain;f=eg-stats/eg-stats-keepalive_rsyslog.sh" + +sudo chmod +x eg-stats-keepalive.sh + +----------------- + +syslog-ng setup: + +- Add the following lines to the bottom of /etc/syslog-ng/syslog-ng.conf: #eg-stats-collector +source s_file_eg-statslog { file("/var/log/evergreen/prod/$YEAR/$MONTH/$DAY/eg-stats.log" follow_freq(1)); }; +log { source(s_file_eg-statslog); destination(d_ext); }; + +Restart syslog: + +/etc/init.d/syslog-ng restart + +--------------- + +rsyslog setup: + +- Add the following lines below the RULES section (replace syslog_server with your central syslog machine name or ip): +# +# Evergreen +# +local0.* @syslog_server:514 +local1.* @syslog_server:514 +local2.* @syslog_server:514 +local3.* @syslog_server:514 +local6.* @syslog_server:514 +local7.* @syslog_server:514 +local0.* ~ +local1.* ~ +local2.* ~ +local3.* ~ +local6.* ~ +local7.* ~ + +Restart rsyslog: + +/etc/init.d/rsyslog restart + +------------------------------- + +Edit crontab: crontab -e Insert this line: */1 * * * * /usr/bin/eg-stats-keepalive.sh >/dev/null 2>&1 + diff --git a/eg-stats/eg-stats-collector-remote-log.pl b/eg-stats/eg-stats-collector-remote-log.pl new file mode 100644 index 0000000..87dfba0 --- /dev/null +++ b/eg-stats/eg-stats-collector-remote-log.pl @@ -0,0 +1,200 @@ +#!/usr/bin/perl +use Getopt::Long; +use XML::LibXML; +use Sys::Syslog; + +my ( $config_file, $timeout, $run_once, $cpu_mode, $mem_mode, $drone_mode, + $help, $services ) + = ( '/openils/conf/opensrf.xml', 15, 0 ); +GetOptions( + 'config=s' => \$config_file, + '1' => \$run_once, + 'cpu' => \$cpu_mode, + 'ram' => \$mem_mode, + 'opensrf' => \$drone_mode, + 'service=s' => \$services, + 'delay=i' => \$timeout, + 'help' => \$help +); + +help() && exit if ($help); + +$cpu_mode = $mem_mode = $drone_mode = 1 + if ( !$cpu_mode && !$mem_mode && !$drone_mode ); + +my @prev_data; + +openlog( 'eg-stats', 'ndelay', 'local0' ); + +# gather data ... +if ($cpu_mode) { + open PROCSTAT, '/proc/stat'; + my $line = ; + close PROCSTAT; + + chomp $line; + + @prev_data = split /\s+/, $line; + + sleep($timeout); +} + +$services = { map { $_ => 1 } split ',', $services } if ($services); + +my @apps; +my @activeapps; +if ($drone_mode) { + my $parser = XML::LibXML->new(); + + # Return an XML::LibXML::Document object + my $config = $parser->parse_file($config_file); + + @activeapps = $config->findnodes('/opensrf/hosts/*/activeapps/*'); + @apps = $config->findnodes('/opensrf/default/apps/*'); + + unless(%$services) { + %$services = map { $_->textContent => 1 } @activeapps; + } + +} + +do { + + if ($cpu_mode) { + open PROCSTAT, '/proc/stat'; + my $line = ; + close PROCSTAT; + + chomp $line; + + my @current_data = split /\s+/, $line; + pop @current_data; + + if (@prev_data) { + my @delta; + for my $i ( 0 .. 8 ) { + $delta[$i] = $current_data[$i] - $prev_data[$i]; + } + + my $total = 0; + $total += $_ for (@delta); + + my $res = sprintf( + 'CPU : ' . "user:" + . sprintf( '%0.2f', ( $delta[1] / $total ) * 100 ) . ', ' + . "idle:" + . sprintf( '%0.2f', ( $delta[4] / $total ) * 100 ) . ', ' + . "iow:" + . sprintf( '%0.2f', ( $delta[5] / $total ) * 100 ) . ', ' + . "steal:" + . sprintf( '%0.2f', ( $delta[8] / $total ) * 100 ) . "\n" + ); + syslog( LOG_INFO, $res ); + } + + @prev_data = @current_data; + } + + if ($drone_mode) { + my @data = split /\n/s, `ps ax|grep OpenSRF`; + my %service_data; + for (@data) { + if (/OpenSRF (\w+) \[([^\]]+)\]/) { + my ( $s, $t ) = ( $2, lc($1) ); + next unless exists $services->{$s}; + if ( !exists( $service_data{$s}{$t} ) ) { + $service_data{$s}{$t} = 1; + } + else { + $service_data{$s}{$t}++; + } + } + } + + for my $s (sort keys %$services) { + my ($node) = grep { $_->nodeName eq $s } @apps; + next unless ($node); + + my $max_kids = $node->findvalue('unix_config/max_children'); + + my $lcount = $service_data{$s}{listener} || 0; + my $dcount = $service_data{$s}{drone} || 0; + $res = sprintf( "SERVICE ($s) : " + . "listener count: $lcount, drone count: $dcount/$max_kids" ); + + syslog( LOG_INFO, $res ); + } + } + + if ($mem_mode) { + my @memdata = split /\n/s, `cat /proc/meminfo`; + my %memparts; + for (@memdata) { + if (/^(\w+):\s+(\d+)/) { + $memparts{$1} = $2; + } + } + + my $total = $memparts{MemTotal}; + my $free = $memparts{MemFree}; + my $buffers = $memparts{Buffers}; + my $cached = $memparts{Cached}; + my $available = $free + $buffers + $cached; + + $res = sprintf( + 'RAM : ' + . "total:$total kB, " + . "free:$free kB, " + . "buffers:$buffers kB, " + . "cached:$cached kB, " + . "available:$available kB, " + . 'free%:' + . sprintf( '%0.2f', ( $free / $total ) * 100 ) . ', ' + . 'buffers%:' + . sprintf( '%0.2f', ( $free / $total ) * 100 ) . ', ' + . 'cached%:' + . sprintf( '%0.2f', ( $cached / $total ) * 100 ) . ', ' + . 'available%:' + . sprintf( '%0.2f', ( $available / $total ) * 100 ) . "\n" + ); + syslog( LOG_INFO, $res ); + } + + $run_once--; +} while ( $run_once != 0 && sleep($timeout) ); + +sub help { + print < + OpenSRF configuration file for Evergreen. + Default: /openils/conf/opensrf.xml + + --1 + Run once and stop + + --cpu + Collect CPU usage stats via /proc/stat + + --ram + Collect RAM usage stats via /proc/meminfo + + --opensrf + Collect Evergreen service status info from the output of ps + + --service= + Comma separated list of services to report on. If not supplied, all + services are reported. + + --delay= + Delay time for collecting CPU stats. + Default: 5 + + --help + Print this help message + +HELP +} + diff --git a/eg-stats/eg-stats-keepalive_rsyslog.sh b/eg-stats/eg-stats-keepalive_rsyslog.sh new file mode 100644 index 0000000..9977fe3 --- /dev/null +++ b/eg-stats/eg-stats-keepalive_rsyslog.sh @@ -0,0 +1,4 @@ +count=`ps ax|grep eg-stats-collector-remote-log.pl|grep -v grep| wc -l` +if [ $count -lt 1 ] ; then + /usr/bin/eg-stats-collector-remote-log.pl & +fi diff --git a/eg-stats/eg-stats-keepalive_syslog-ng.sh b/eg-stats/eg-stats-keepalive_syslog-ng.sh new file mode 100644 index 0000000..72c296f --- /dev/null +++ b/eg-stats/eg-stats-keepalive_syslog-ng.sh @@ -0,0 +1,6 @@ +count=`ps ax|grep eg-stats-collector-remote-log.pl|grep -v grep| wc -l` +if [ $count -lt 1 ] ; then + /usr/bin/eg-stats-collector-remote-log.pl --service=open-ils.acq,open-ils.auth,open-ils.search,open-ils.actor,open-ils.booking,open-ils.cat,open-ils.supercat,open-ils.trigger,opensrf.math,opensrf.dbmath,open-ils.penalty,open-ils.circ,open-ils.ingest,open-ils.storage,open-ils.cstore,open-ils.pcrud,opensrf.settings,open-ils.collections,open-ils.reporter,open-ils.reporter-store,open-ils.permacrud,open-ils.fielder,open-ils.vandelay & + sleep 1 + /etc/init.d/syslog-ng restart +fi diff --git a/eg-stats/parse-eg-stats.pl b/eg-stats/parse-eg-stats.pl new file mode 100644 index 0000000..403175b --- /dev/null +++ b/eg-stats/parse-eg-stats.pl @@ -0,0 +1,100 @@ +#!/usr/bin/perl -w +package main; +use POSIX; +use strict; + + +#get date info +my ($SEC, $MIN, $HOUR, $DAY,$MONTH,$YEAR) = (localtime(time))[0,1,2,3,4,5,6]; +$YEAR+=1900; +$MONTH++; +if ($DAY < 10) { + $DAY = "0".$DAY; +} +if ($MONTH < 10) { + $MONTH = "0".$MONTH; +} + +my $posF = "/tmp/eg_stats_position.log"; +my $statsF = "/var/log/evergreen/prod/$YEAR/$MONTH/$DAY/eg_stats.log"; +my $pos; +my $loc; +my $status = 0; #status is OK! +my $info = ""; + +#if it exists open it and get the current position +#if not set the current position to 0 +if (-e $posF) { + open(DATA, "<$posF"); + my @values = ; + if (@values != 2) { #make sure the array is the correct size + $pos = 0; + } else { + chomp($loc = $values[0]); + if ($loc ne $statsF) { #check to see that we are in the correct file + $pos = 0; + } else { + chomp($pos = $values[1]); + #check to see if $pos is a valid positive integer(or 0), if not set to 0 + if (!( $pos =~ /^\d+$/ )) { + $pos = 0; + } + } + } + close DATA; +} else { + $pos = 0; +} +#parse the file and output for Nagios if necessary +if (-e $statsF) { + open(DATA, "<$statsF"); + seek DATA, $pos, 0; + while() { + my($line) = $_; + chomp($line); + #check for lost controller first + if (($line =~ m/listener count: 0/) || ($line =~ m/controller count: 0/) || ($line =~ m/master count: 0/)){ + if ($line =~ m/listener count: 0/) { + $info = $info."Lost a listener: $line - "; + } + if ($line =~ m/master count: 0/) { + $info = $info."Lost a master: $line - "; + } + if ($line =~ m/controller count: 0/) { + $info = $info."Lost a controller: $line - "; + } + $status = 2; + } + #now check for drone ratio + if ($line =~ m/SERVICE/) { + my ($count) = $line =~ /drone count: (\d+\/\d+)/i; + my $ratio = eval($count); + my $pct = ceil($ratio * 100); + if ($ratio >= 0.75) { + $info = $info."Drone count is $pct % - $line - "; + $status = 1; + } + if ($ratio >= 0.9) { + $info = $info."Drone count is $pct % - $line - "; + $status = 2; + } + } + + } + $pos = tell DATA; + close DATA; +} else { + $pos = 0; +} + +#update position info +open(DATA, ">$posF"); +print DATA "$statsF\n$pos\n"; +close DATA; + +if ($info eq "") { + $info = "EG-STATS-COLLECTOR STATUS: OK!"; +} + +print $info; +exit $status;