From 427cb93b5b5d2ce7ae2c9e41bfa54b1c13f3ea0a Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 20 Mar 2015 20:33:39 +0000 Subject: [PATCH] LP#1435494: set limits on Clark Kent's resource usage Clark Kent can sometimes consume more RAM, swap space, or CPU than is reasonable or productive. For example: - a badly constructed query with multiple Cartesian joins may never terminate, potentially tying up a Clark child process, pegging a CPU on the database server, and/or causing significant scratch disk usage on the database server keeping a snapshot alive. - a query that returns a very large number of rows can cause a Clark child to bloat, and in extreme cases cause a OOM on the server running Clark. - a report that asks for a chart of an unreasonably large number of rows can peg a CPU on the Clark server as GD::Graph attempts to compute sub-pixel graph elements. In each of these cases, a requested report may never finish. This patch adds the ability set set some limits on Clark. These limits can be set either in opensrf.xml for the settings service to distribute or via command-line switches to clark-kent.pl: //reporter/setup/statement_timeout / --statement-timeout Number of minutes to allow a report's underlying SQL query to run before it gets cancelled. Default value is 60 minutes. If a report's query gets cancelled, the error_text value will be set to a valid that indicates that the allowed time was exceeded. //reporter/setup/max_rows_for_charts / --max-rows-for-charts Number of rows permitted in the query's output before Clark Kent refuses to attempt to draw a graph. Default value is 1,000 rows. //reporter/setup/resultset_limit / --resultset-limit If set, truncates the report's output to the specified number of hits. Note that it will not be apparent to a staff user if the report's output has been truncated. Default value is unlimited. This patch also adds the ability for the concurrency to be set via an opensrf.xml setting (//reporter/setup/parallel). If both a command-line switch and an opensrf.xml setting are supplied, the value set in the command line takes precedence. Signed-off-by: Galen Charlton Signed-off-by: Ben Shum --- Open-ILS/examples/opensrf.xml.example | 26 +++++++++ .../perlmods/lib/OpenILS/Reporter/SQLBuilder.pm | 12 +++++ Open-ILS/src/reporter/clark-kent.pl | 61 +++++++++++++++++----- 3 files changed, 87 insertions(+), 12 deletions(-) diff --git a/Open-ILS/examples/opensrf.xml.example b/Open-ILS/examples/opensrf.xml.example index 9f3209d04f..fdaf1508b1 100644 --- a/Open-ILS/examples/opensrf.xml.example +++ b/Open-ILS/examples/opensrf.xml.example @@ -174,6 +174,32 @@ vim:et:ts=4:sw=4: LOCALSTATEDIR/data/report-success LOCALSTATEDIR/data/report-fail + + 1 + + 1000 + + 60 + + diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Reporter/SQLBuilder.pm b/Open-ILS/src/perlmods/lib/OpenILS/Reporter/SQLBuilder.pm index 940972a33e..41d76ba95f 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Reporter/SQLBuilder.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Reporter/SQLBuilder.pm @@ -38,6 +38,13 @@ sub relative_time { return $self->builder->{_relative_time}; } +sub resultset_limit { + my $self = shift; + my $limit = shift; + $self->builder->{_resultset_limit} = $limit if (defined $limit); + return $self->builder->{_resultset_limit}; +} + sub resolve_param { my $self = shift; my $val = shift; @@ -237,6 +244,8 @@ sub toSQL { if ($self->is_subquery) { $sql = '('; + } elsif ($self->resultset_limit) { + $sql = 'SELECT * FROM ('; } $sql .= "SELECT\t" . join(",\n\t", map { $_->toSQL } @{ $self->{_select} }) . "\n" if (@{ $self->{_select} }); @@ -251,6 +260,9 @@ sub toSQL { if ($self->is_subquery) { $sql .= ') '. $self->{_alias} . "\n"; + } elsif ($self->resultset_limit) { + $sql .= ') limited_to_' . $self->resultset_limit . + '_hits LIMIT ' . $self->resultset_limit . "\n"; } return $self->{_sql} = $sql; diff --git a/Open-ILS/src/reporter/clark-kent.pl b/Open-ILS/src/reporter/clark-kent.pl index 8893fc5ca6..c2a6e9f434 100755 --- a/Open-ILS/src/reporter/clark-kent.pl +++ b/Open-ILS/src/reporter/clark-kent.pl @@ -29,12 +29,20 @@ use Email::Send; use open ':utf8'; -my ($count, $config, $sleep_interval, $lockfile, $daemon) = (1, 'SYSCONFDIR/opensrf_core.xml', 10, '/tmp/reporter-LOCK'); +my ($config, $sleep_interval, $lockfile, $daemon) = ('SYSCONFDIR/opensrf_core.xml', 10, '/tmp/reporter-LOCK'); + +my $opt_count; +my $opt_max_rows_for_charts; +my $opt_statement_timeout; +my $opt_resultset_limit; GetOptions( "daemon" => \$daemon, "sleep=i" => \$sleep_interval, - "concurrency=i" => \$count, + "concurrency=i" => \$opt_count, + "max-rows-for-charts=i" => \$opt_max_rows_for_charts, + "resultset-limit=i" => \$opt_resultset_limit, + "statement-timeout=i" => \$opt_statement_timeout, "bootstrap|boostrap=s" => \$config, "lockfile=s" => \$lockfile, ); @@ -88,6 +96,21 @@ my $base_uri = $sc->config_value( reporter => setup => 'base_uri' ); my $state_dsn = "dbi:" . $state_db{db_driver} . ":dbname=" . $state_db{db_name} .';host=' . $state_db{db_host} . ';port=' . $state_db{db_port}; my $data_dsn = "dbi:" . $data_db{db_driver} . ":dbname=" . $data_db{db_name} .';host=' . $data_db{db_host} . ';port=' . $data_db{db_port}; +my $count = $opt_count // + $sc->config_value( reporter => setup => 'parallel' ) // + 1; +$count = 1 unless $count =~ /^\d+$/ && $count > 0; +my $statement_timeout = $opt_statement_timeout // + $sc->config_value( reporter => setup => 'statement_timeout' ) // + 60; +$statement_timeout = 60 unless $statement_timeout =~ /^\d+$/; +my $max_rows_for_charts = $opt_max_rows_for_charts // + $sc->config_value( reporter => setup => 'max_rows_for_charts' ) // + 1000; +$max_rows_for_charts = 1000 unless $max_rows_for_charts =~ /^\d+$/; +my $resultset_limit = $opt_resultset_limit // + $sc->config_value( reporter => setup => 'resultset_limit' ); + my ($dbh,$running,$sth,@reports,$run, $current_time); if ($daemon) { @@ -167,6 +190,7 @@ while (my $r = $sth->fetchrow_hashref) { $r->{resultset}->set_pivot_label($report_data->{__pivot_label}) if $report_data->{__pivot_label}; $r->{resultset}->set_pivot_default($report_data->{__pivot_default}) if $report_data->{__pivot_default}; $r->{resultset}->relative_time($r->{run_time}); + $r->{resultset}->resultset_limit($resultset_limit) if $resultset_limit; push @reports, $r; } @@ -203,6 +227,7 @@ for my $r ( @reports ) { RaiseError => 1 } ); + $data_dbh->do('SET statement_timeout = ?', {}, ($statement_timeout * 60 * 1000)); try { $state_dbh->do(<<' SQL',{}, $r->{id}); @@ -544,28 +569,40 @@ sub build_html { # Time for a pie chart if ($r->{chart_pie}) { - my $pics = draw_pie($r, $file); - for my $pic (@$pics) { - print $index "$pic->{name}$br4"; + if (scalar(@{$r->{data}}) > $max_rows_for_charts) { + print $index "Report output has too many rows to make a pie chart$br4"; + } else { + my $pics = draw_pie($r, $file); + for my $pic (@$pics) { + print $index "$pic->{name}$br4"; + } } } print $index $br4; # Time for a bar chart if ($r->{chart_bar}) { - my $pics = draw_bars($r, $file); - for my $pic (@$pics) { - print $index "$pic->{name}$br4"; + if (scalar(@{$r->{data}}) > $max_rows_for_charts) { + print $index "Report output has too many rows to make a bar chart$br4"; + } else { + my $pics = draw_bars($r, $file); + for my $pic (@$pics) { + print $index "$pic->{name}$br4"; + } } } print $index $br4; # Time for a bar chart if ($r->{chart_line}) { - my $pics = draw_lines($r, $file); - for my $pic (@$pics) { - print $index "$pic->{name}$br4"; - } + if (scalar(@{$r->{data}}) > $max_rows_for_charts) { + print $index "Report output has too many rows to make a line chart$br4"; + } else { + my $pics = draw_lines($r, $file); + for my $pic (@$pics) { + print $index "$pic->{name}$br4"; + } + } } # and that's it! -- 2.11.0