From ad6a662616e3f56880d64e924d932497e458063c Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Thu, 9 Aug 2012 10:26:02 -0400 Subject: [PATCH] URLVerify.pm; settings, tmp caching, cleanup / misc Signed-off-by: Bill Erickson --- .../perlmods/lib/OpenILS/Application/URLVerify.pm | 87 ++++++++++++++++------ 1 file changed, 66 insertions(+), 21 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm index 6f6fa9f235..c3f1dd5340 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm @@ -275,26 +275,17 @@ sub verify_url { my $url = $e->retrieve_url_verify_url($url_id) or return $e->event; - my $attempt = $e->retrieve_url_verify_verification_attempt([ - $attempt_id, { - flesh => 1, - flesh_fields => {uvva => ['session']} - } - ]) or return $e->event; - - my $session = $attempt->session; + my ($attempt, $delay, $max_redirects, $timeout) = + collect_verify_attempt_and_settings($attempt_id); - return $e->event unless $e->allowed('VERIFY_URL'); + return $e->event unless $e->allowed( + 'VERIFY_URL', $attempt->session->owning_lib); - my $delay = 2; # TODO: org setting - my $max_redirects = 20; # TODO: org setting - my $timeout = 5; # TODO: org setting - - my $depth = 0; my $cur_url = $url; my $loop_detected = 0; + my $redir_count = 0; - while ($depth++ < $max_redirects) { + while ($redir_count++ < $max_redirects) { if ($seen_urls{$cur_url->full_url}) { $loop_detected = 1; @@ -315,7 +306,7 @@ sub verify_url { $cur_url = $url_resp->{redirect_url} or last; } - if ($loop_detected or $depth == $max_redirects) { + if ($loop_detected or $redir_count == $max_redirects) { my $vcation = Fieldmapper::url_verify::url_verification->new; $vcation->url($cur_url->id); @@ -347,6 +338,65 @@ sub verify_url { return undef; } +# temporarily cache some data to avoid a pile +# of data lookups on every URL processed. +my %cache; +sub collect_verify_attempt_and_settings { + my ($e, $attempt_id) = @_; + my $attempt; + + if (!(keys %cache) or $cache{age} > 20) { # configurable? + %cache = ( + age => 0, + attempt => {}, + delay => {}, + redirects => {}, + timeout => {}, + ); + } + + if ( !($attempt = $cache{attempt}{$attempt_id}) ) { + + # attempt may have just been created, so + # we need to guarantee a write-DB read. + $e->xact_begin; + + $$attempt = + $e->retrieve_url_verify_verification_attempt([ + $attempt_id, { + flesh => 1, + flesh_fields => {uvva => ['session']} + } + ]) or return $e->die_event; + + $e->rollback; + + $cache{attempt}{$attempt_id} = $attempt; + } + + my $org = $attempt->session->owning_lib; + + if (!$cache{delay}{$org}) { + $cache{delay}{$org} = $U->ou_ancestor_setting_value( + $org, 'url_verify.url_verification_delay', $e) || 2; + + $cache{redirects}{$org} = $U->ou_ancestor_setting_value( + $org, 'url_verify.url_verification_max_redirects', $e) || 20; + + $cache{timeout}{$org} = $U->ou_ancestor_setting_value( + $org, 'url_verify.url_verification_max_wait', $e) || 5; + } + + $cache{age}++; + + return ( + $cache{attempt}{$attempt_id}, + $cache{delay}{$org}, + $cache{redirects}{$org}, + $cache{timeout}{$org} + ); +} + =head comment 1. create the verification object and commit. @@ -423,11 +473,6 @@ sub verify_one_url { $redir_url = Fieldmapper::url_verify::url->new; $redir_url->redirect_from($url->id); $redir_url->full_url($loc); - $redir_url->item($url->item); - $redir_url->url_selector($url->url_selector); - $redir_url->tag($url->tag); - $redir_url->subfield($url->subfield); - $redir_url->ord($url->ord); $logger->info("url: redirect found $url_text => $loc"); -- 2.11.0