From: Bill Erickson Date: Tue, 7 Aug 2012 20:27:57 +0000 (-0400) Subject: URLVerify.pm getting started X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=872b5966130e676c8e482e56a282e59b665c8af4;p=evergreen%2Fequinox.git URLVerify.pm getting started Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm new file mode 100644 index 0000000000..1335983e40 --- /dev/null +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm @@ -0,0 +1,279 @@ +package OpenILS::Application::url_verify; +use base qw/OpenILS::Application/; +use strict; use warnings; +use OpenSRF::Utils::Logger qw(:logger); +use OpenILS::Utils::Fieldmapper; +use OpenILS::Utils::CStoreEditor q/:funcs/; +use Net::HTTP::NB; +use OpenILS::Application::AppUtils; +my $U = 'OpenILS::Application::AppUtils'; + + +__PACKAGE__->register_method( + method => 'validate_session', + api_name => 'open-ils.url_verify.session.validate', + stream => 1, + signature => { + desc => q/ + Performs verification on all (or a subset of the) URLs within the requested session. + /, + params => [ + {desc => 'Authentication token', type => 'string'}, + {desc => 'Session ID (url_verify.session.id)', type => 'number'}, + {desc => 'URL ID list (optional)', type => 'array'} + ], + return => {desc => 'TODO'} + } +); + +sub validate_session { + my ($self, $client, $auth, $session_id, $url_ids) = @_; + + # loop through list of URLs / session URLs + # see if we've already tested the url, if so copy the status info / redirect_to info and move on + # add a sleep (org setting) to the multisession handler + # Avoid testing URLs having the same domain sequentially + + my $e = new_editor(authtoken => $auth, xact => 1); + return $e->die_event unless $e->checkauth; + return $e->die_event unless $e->allowed('VERIFY_URL'); + + my $session = $e->retrieve_url_verify_session($session_id) or return $e->die_event; + + if (!$url_ids) { + # fetch IDs + } + + # create the attempt + # process the URLs in batch + + return undef; +} + + +__PACKAGE__->register_method( + method => 'validate_url', + api_name => 'open-ils.url_verify.validate_url', + stream => 1, + signature => { + desc => q/ + Performs verification of a single URL. When a redirect is detected, + a new URL is created to model the redirect and the redirected URL + is then tested, up to max-redirects or a loop is detected. + /, + params => [ + {desc => 'Authentication token', type => 'string'}, + {desc => 'Verification attempt ID (url_verify.verification_attempt.id)', type => 'number'}, + {desc => 'URL id (url_verify.url.id)', type => 'number'}, + ], + return => {desc => q/Stream of url_verification objects, one per URL tested/} + } +); + +=head comment + +verification.res_code: + +999 bad hostname, etc. (IO::Socket::Inet errors) +998 in-flight errors (e.g connection closed prematurely) +997 timeout +996 redirect loop +995 max redirects + +verification.res_text: + +$@ or custom message "Redirect Loop" + +=cut + +sub validate_url { + my ($self, $client, $auth, $attempt_id, $url_id) = @_; + my %seen_urls; + + my $e = new_editor(authtoken => $auth); + return $e->event unless $e->checkauth; + + my $url = $e->retrieve_url_verify_url($url_id) or return $e->event; + + my $attempt = $e->retrieve_url_verify_verification_attempt([ + $attempt_id, { + flesh => 1, + flesh_fields => {uvva => ['session']} + } + ]) or return $e->event; + + my $session = $attempt->session; + + return $e->event unless $e->allowed('VERIFY_URL'); + + my $depth = 0; + my $max_redirects = 20; # TODO: org setting + my $timeout = 5; # TODO: org setting + + my $cur_url = $url; + my $loop_detected = 0; + + while ($depth++ < $max_redirects) { + + if ($seen_urls{$cur_url->url}) { + $loop_detected = 1; + last; + } + + $seen_urls{$cur_url->url} = $cur_url; + + my $url_resp = validate_one_url($e, $attempt, $cur_url, $timeout); + + # something tragic happened + return $url_resp if $U->is_event($url_resp); + + # flesh and respond to the caller + $url_resp->{verification}->url($cur_url); + $client->respond($url_resp->{verification}); + + $cur_url = $url_resp->{redirect_url} or last; + } + + if ($loop_detected or $depth == $max_redirects) { + + my $vcation = Fieldmapper::url_verify::url_verification->new; + $vcation->url($cur_url->id); + $vcation->attempt($attempt->id); + $vcation->req_time('now'); + + if ($loop_detected) { + $logger->info("url: redirect loop detected at " . $cur_url->url); + $vcation->res_code('996'); + $vcation->res_text('Redirect Loop'); + + } else { + $logger->info("url: max redirects reached for " . $cur_url->url); + $vcation->res_code('995'); + $vcation->res_text('Max Redirects'); + } + + $e->xact_begin; + $e->create_url_verify_url_verification($vcation) or return $e->die_event; + $e->xact_commit; + } + + return undef; +} + +=head comment + +1. create the verification object and commit. +2. test the URL +3. update the verification object to capture the results of the test +4. Return redirect_url object if this is a redirect, otherwise undef. + +=cut + +sub validate_one_url { + my ($e, $attempt, $url, $timeout) = @_; + + my $url_text = $url->url; + my $redir_url; + + # first, create the verification object so we can a) indicate that + # we're working on this URL and b) get the DB to set the req_time. + + my $vcation = Fieldmapper::url_verify::url_verification->new; + $vcation->url($url->id); + $vcation->attempt($attempt->id); + $vcation->req_time('now'); + + # begin phase-I DB communication + + $e->xact_begin; + $e->create_url_verify_url_verification($vcation) or return $e->die_event; + $e->xact_commit; + + # End phase-I DB communication + # No active DB xact means no cstore timeout concerns. + + # Now test the URL. + + my $req = Net::HTTP::NB->new(Host => $url->host); + + if ($req) { + + $req->write_request(HEAD => $url->url); + + my $sel = IO::Select->new($req); + + if ($sel->can_read($timeout)) { + + # request headers are ready for reading + + my ($code, $msg, %headers); + + eval { + # uses 'die' internally + ($code, $msg, %headers) = $req->read_response_headers; + }; + + if ($@) { + $logger->info("url: in-flight error $@ [$url_text]"); + $vcation->res_code('998'); + $vcation->res_text($@); + + } else { + + $logger->info("url: received HTTP '$code' [$url_text]"); + + $vcation->res_code($code); + $vcation->res_text($msg); + + # is this a redirect? + if ($code =~ /^3/) { + + if (my $loc = $headers{Location}) { + $redir_url = Fieldmapper::url_verify::url->new; + $redir_url->redirect_from($url->id); + $redir_url->url($loc); + $logger->info("url: redirect found $url_text => $loc"); + + } else { + $logger->info("url: server returned 3XX but no 'Location' header for url $url_text"); + } + } + } + + } else { + + # request timed out + $logger->info("url: request timed out for $url_text"); + } + + } else { + + # Error building connection. Invalid hostname, etc. + + $vcation->res_code('999'); + $vcation->res_text($@); + } + + # Begin phase-II DB communication + + $e->xact_begin; + + if ($redir_url) { + $redir_url = $e->create_url_verify_url($redir_url) or return $e->die_event; + $vcation->redirect_to($redir_url->id); + } + + $vcation->res_time('now'); + $e->update_url_verify_url_verification($vcation) or return $e->die_event; + $e->commit; + + return { + verification => $vcation, + redirect_url => $redir_url + }; +} + + + + +1;