From 96f6a023b42776f3493946909ccc4d434387bc2b Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Wed, 8 Aug 2012 15:38:39 -0400 Subject: [PATCH] URLVerify.pm; redirects / error handling Signed-off-by: Bill Erickson --- .../perlmods/lib/OpenILS/Application/URLVerify.pm | 59 ++++++++++++++-------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm index 973c522380..b7d4b4f05c 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm @@ -26,25 +26,25 @@ __PACKAGE__->register_method( { desc => q/ Options (optional). - report_all => bypass response throttling and return all URL sub-process - responses to the caller. Not recommened for remote (web, etc.) clients, + report_all => bypass response throttling and return all URL sub-process + responses to the caller. Not recommened for remote (web, etc.) clients, because it can be a lot of data. resume_attempt => atttempt_id. Resume verification after a failure. - resume_with_new_attempt => If true, resume from resume_attempt, but + resume_with_new_attempt => If true, resume from resume_attempt, but create a new attempt to track the resumption. /, type => 'hash' } ], return => {desc => q/ - Stream of objects containing the number of URLs to be processed (url_count), - the number processed thus far including redirects (total_processed), - and the current url_verification object (current_verification). - - Note that total_processed may ultimately exceed url_count, since it + Stream of objects containing the number of URLs to be processed (url_count), + the number processed thus far including redirects (total_processed), + and the current url_verification object (current_verification). + + Note that total_processed may ultimately exceed url_count, since it includes non-anticipate-able redirects. - The final response contains url_count, total_processed, and the + The final response contains url_count, total_processed, and the verification_attempt object (attempt). / } @@ -64,7 +64,7 @@ sub validate_session { return $e->die_event unless $e->checkauth; return $e->die_event unless $e->allowed('VERIFY_URL'); - my $session = $e->retrieve_url_verify_session($session_id) + my $session = $e->retrieve_url_verify_session($session_id) or return $e->die_event; my $attempt_id = $options->{resume_attempt}; @@ -79,8 +79,8 @@ sub validate_session { join => { cbreb => { # bucket join => { uvs => { # session filter => {id => $session_id} - }} - }} + }} + }} } } } @@ -90,7 +90,7 @@ sub validate_session { $logger->info("url: resuming attempt $attempt_id"); # when resuming an existing attempt (that presumably failed - # mid-processing), we only want to process URLs that either + # mid-processing), we only want to process URLs that either # have no linked url_verification or have an un-completed # url_verification. @@ -107,6 +107,15 @@ sub validate_session { ] } }; + + } else { + + # this is a new run, so we only want to process URLs that + # originated from the source records and not from redirects. + + $query->{where} = { + '+uvu' => {redirect_from => undef} + }; } my $ids = $e->json_query($query); @@ -119,7 +128,7 @@ sub validate_session { my $attempt; if ($attempt_id and !$options->{resume_with_new_attempt}) { - $attempt = $e->retrieve_url_verification_attempt($attempt_id) + $attempt = $e->retrieve_url_verification_attempt($attempt_id) or return $e->die_event; # no data was written @@ -132,13 +141,13 @@ sub validate_session { $attempt->usr($e->requestor->id); $attempt->start_time('now'); - $e->create_url_verify_verification_attempt($attempt) + $e->create_url_verify_verification_attempt($attempt) or return $e->die_event; $e->commit; } - # END DB TRANSACTION + # END DB TRANSACTION # Now cycle through the URLs in batches. @@ -146,7 +155,7 @@ sub validate_session { my $num_processed = 0; # total number processed, including redirects my $resp_window = 1; - # before we start the real work, let the caller know + # before we start the real work, let the caller know # the attempt (id) so recovery is possible. $client->respond({ @@ -326,9 +335,9 @@ sub verify_url { $e->xact_commit; } - # The calling code is likely not multi-threaded, so a - # per-URL (i.e. per-thread) delay would not be possible. - # Applying the delay here allows the caller to process + # The calling code is likely not multi-threaded, so a + # per-URL (i.e. per-thread) delay would not be possible. + # Applying the delay here allows the caller to process # batches of URLs without having to worry about the delay. sleep $delay; @@ -369,7 +378,11 @@ sub verify_one_url { # Now test the URL. - my $req = Net::HTTP::NB->new(Host => $url->host); + my $req; + eval { + # uses 'die' internally + $req = Net::HTTP::NB->new(Host => $url->host); + }; if ($req) { @@ -425,12 +438,16 @@ sub verify_one_url { # request timed out $logger->info("url: request timed out for $url_text"); + + $vcation->res_code('997'); + $vcation->res_text('Request Timeout'); } } else { # Error building connection. Invalid hostname, etc. + $logger->info("url: error building connection: $@"); $vcation->res_code('999'); $vcation->res_text($@); } -- 2.11.0