From 6af357230180230535de0364278c4809c5a03a3f Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Wed, 8 Aug 2012 15:11:19 -0400 Subject: [PATCH] URLVerify.pm; more testing tweaks Signed-off-by: Bill Erickson --- .../perlmods/lib/OpenILS/Application/URLVerify.pm | 37 +++++++++++++--------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm index 997cfb3ff1..973c522380 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm @@ -142,8 +142,7 @@ sub validate_session { # Now cycle through the URLs in batches. - my $batch_size = 5; # TODO: org setting - my $delay = 2; # TODO: org setting + my $batch_size = 10; # TODO: org setting my $num_processed = 0; # total number processed, including redirects my $resp_window = 1; @@ -187,11 +186,6 @@ sub validate_session { $resp_window *= 2 unless $resp_window == 256; } } - - # insert the per-thread delay, which keeps this thread - # active, which prevents a new thread from firing up to - # replace it. (thread = multisession session) - sleep $delay; }, failure_handler => sub { @@ -280,21 +274,22 @@ sub verify_url { return $e->event unless $e->allowed('VERIFY_URL'); - my $depth = 0; + my $delay = 2; # TODO: org setting my $max_redirects = 20; # TODO: org setting my $timeout = 5; # TODO: org setting + my $depth = 0; my $cur_url = $url; my $loop_detected = 0; while ($depth++ < $max_redirects) { - if ($seen_urls{$cur_url->url}) { + if ($seen_urls{$cur_url->full_url}) { $loop_detected = 1; last; } - $seen_urls{$cur_url->url} = $cur_url; + $seen_urls{$cur_url->full_url} = $cur_url; my $url_resp = verify_one_url($e, $attempt, $cur_url, $timeout); @@ -316,12 +311,12 @@ sub verify_url { $vcation->req_time('now'); if ($loop_detected) { - $logger->info("url: redirect loop detected at " . $cur_url->url); + $logger->info("url: redirect loop detected at " . $cur_url->full_url); $vcation->res_code('996'); $vcation->res_text('Redirect Loop'); } else { - $logger->info("url: max redirects reached for " . $cur_url->url); + $logger->info("url: max redirects reached for " . $cur_url->full_url); $vcation->res_code('995'); $vcation->res_text('Max Redirects'); } @@ -331,6 +326,12 @@ sub verify_url { $e->xact_commit; } + # The calling code is likely not multi-threaded, so a + # per-URL (i.e. per-thread) delay would not be possible. + # Applying the delay here allows the caller to process + # batches of URLs without having to worry about the delay. + sleep $delay; + return undef; } @@ -346,7 +347,7 @@ sub verify_url { sub verify_one_url { my ($e, $attempt, $url, $timeout) = @_; - my $url_text = $url->url; + my $url_text = $url->full_url; my $redir_url; # first, create the verification object so we can a) indicate that @@ -372,7 +373,7 @@ sub verify_one_url { if ($req) { - $req->write_request(HEAD => $url->url); + $req->write_request(HEAD => $url->full_url); my $sel = IO::Select->new($req); @@ -405,7 +406,13 @@ sub verify_one_url { if (my $loc = $headers{Location}) { $redir_url = Fieldmapper::url_verify::url->new; $redir_url->redirect_from($url->id); - $redir_url->url($loc); + $redir_url->full_url($loc); + $redir_url->item($url->item); + $redir_url->url_selector($url->url_selector); + $redir_url->tag($url->tag); + $redir_url->subfield($url->subfield); + $redir_url->ord($url->ord); + $logger->info("url: redirect found $url_text => $loc"); } else { -- 2.11.0