URLVerify.pm getting started
authorBill Erickson <berick@esilibrary.com>
Tue, 7 Aug 2012 20:27:57 +0000 (16:27 -0400)
committerBill Erickson <berick@esilibrary.com>
Wed, 8 Aug 2012 15:51:33 +0000 (11:51 -0400)
Signed-off-by: Bill Erickson <berick@esilibrary.com>
Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm [new file with mode: 0644]

diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm
new file mode 100644 (file)
index 0000000..1335983
--- /dev/null
@@ -0,0 +1,279 @@
+package OpenILS::Application::url_verify;
+use base qw/OpenILS::Application/;
+use strict; use warnings;
+use OpenSRF::Utils::Logger qw(:logger);
+use OpenILS::Utils::Fieldmapper;
+use OpenILS::Utils::CStoreEditor q/:funcs/;
+use Net::HTTP::NB;
+use OpenILS::Application::AppUtils;
+my $U = 'OpenILS::Application::AppUtils';
+
+
+__PACKAGE__->register_method(
+    method => 'validate_session',
+    api_name => 'open-ils.url_verify.session.validate',
+    stream => 1,
+    signature => {
+        desc => q/
+            Performs verification on all (or a subset of the) URLs within the requested session.
+        /,
+        params => [
+            {desc => 'Authentication token', type => 'string'},
+            {desc => 'Session ID (url_verify.session.id)', type => 'number'},
+            {desc => 'URL ID list (optional)', type => 'array'}
+        ],
+        return => {desc => 'TODO'}
+    }
+);
+
+sub validate_session {
+    my ($self, $client, $auth, $session_id, $url_ids) = @_;
+
+    # loop through list of URLs / session URLs
+    # see if we've already tested the url, if so copy the status info  / redirect_to info and move on
+    # add a sleep (org setting) to the multisession handler
+    # Avoid testing URLs having the same domain sequentially
+
+    my $e = new_editor(authtoken => $auth, xact => 1);
+    return $e->die_event unless $e->checkauth;
+    return $e->die_event unless $e->allowed('VERIFY_URL');
+
+    my $session = $e->retrieve_url_verify_session($session_id) or return $e->die_event;
+
+    if (!$url_ids) {
+        # fetch IDs
+    }
+
+    # create the attempt
+    # process the URLs in batch
+
+    return undef;
+}
+
+
+__PACKAGE__->register_method(
+    method => 'validate_url',
+    api_name => 'open-ils.url_verify.validate_url',
+    stream => 1,
+    signature => {
+        desc => q/
+            Performs verification of a single URL.  When a redirect is detected,
+            a new URL is created to model the redirect and the redirected URL
+            is then tested, up to max-redirects or a loop is detected.
+        /,
+        params => [
+            {desc => 'Authentication token', type => 'string'},
+            {desc => 'Verification attempt ID (url_verify.verification_attempt.id)', type => 'number'},
+            {desc => 'URL id (url_verify.url.id)', type => 'number'},
+        ],
+        return => {desc => q/Stream of url_verification objects, one per URL tested/}
+    }
+);
+
+=head comment
+
+verification.res_code:
+
+999 bad hostname, etc. (IO::Socket::Inet errors)
+998 in-flight errors (e.g connection closed prematurely)
+997 timeout
+996 redirect loop
+995 max redirects
+
+verification.res_text:
+
+$@ or custom message "Redirect Loop"
+
+=cut
+
+sub validate_url {
+    my ($self, $client, $auth, $attempt_id, $url_id) = @_;
+    my %seen_urls;
+
+    my $e = new_editor(authtoken => $auth);
+    return $e->event unless $e->checkauth;
+
+    my $url = $e->retrieve_url_verify_url($url_id) or return $e->event;
+
+    my $attempt = $e->retrieve_url_verify_verification_attempt([
+        $attempt_id, { 
+            flesh => 1, 
+            flesh_fields => {uvva => ['session']}
+        }
+    ]) or return $e->event;
+
+    my $session = $attempt->session;
+    
+    return $e->event unless $e->allowed('VERIFY_URL');
+
+    my $depth = 0;
+    my $max_redirects = 20; # TODO: org setting
+    my $timeout = 5; # TODO: org setting
+
+    my $cur_url = $url;
+    my $loop_detected = 0;
+
+    while ($depth++ < $max_redirects) {
+
+        if ($seen_urls{$cur_url->url}) {
+            $loop_detected = 1;
+            last;
+        }
+
+        $seen_urls{$cur_url->url} = $cur_url;
+         
+        my $url_resp = validate_one_url($e, $attempt, $cur_url, $timeout);
+
+        # something tragic happened
+        return $url_resp if $U->is_event($url_resp);
+
+        # flesh and respond to the caller
+        $url_resp->{verification}->url($cur_url);
+        $client->respond($url_resp->{verification});
+
+        $cur_url = $url_resp->{redirect_url} or last;
+    }
+
+    if ($loop_detected or $depth == $max_redirects) {
+
+        my $vcation = Fieldmapper::url_verify::url_verification->new;
+        $vcation->url($cur_url->id);
+        $vcation->attempt($attempt->id);
+        $vcation->req_time('now');
+
+        if ($loop_detected) {
+            $logger->info("url: redirect loop detected at " . $cur_url->url);
+            $vcation->res_code('996');
+            $vcation->res_text('Redirect Loop');
+
+        } else {
+            $logger->info("url: max redirects reached for " . $cur_url->url);
+            $vcation->res_code('995');
+            $vcation->res_text('Max Redirects');
+        }
+
+        $e->xact_begin; 
+        $e->create_url_verify_url_verification($vcation) or return $e->die_event;
+        $e->xact_commit;
+    }
+
+    return undef;
+}
+
+=head comment
+
+1. create the verification object and commit.
+2. test the URL
+3. update the verification object to capture the results of the test
+4. Return redirect_url object if this is a redirect, otherwise undef.
+
+=cut
+
+sub validate_one_url {
+    my ($e, $attempt, $url, $timeout) = @_;
+
+    my $url_text = $url->url;
+    my $redir_url;
+
+    # first, create the verification object so we can a) indicate that 
+    # we're working on this URL and b) get the DB to set the req_time.
+
+    my $vcation = Fieldmapper::url_verify::url_verification->new;
+    $vcation->url($url->id);
+    $vcation->attempt($attempt->id);
+    $vcation->req_time('now');
+
+    # begin phase-I DB communication
+
+    $e->xact_begin; 
+    $e->create_url_verify_url_verification($vcation) or return $e->die_event;
+    $e->xact_commit;
+
+    # End phase-I DB communication
+    # No active DB xact means no cstore timeout concerns.
+
+    # Now test the URL.
+
+    my $req = Net::HTTP::NB->new(Host => $url->host);
+
+    if ($req) {
+
+        $req->write_request(HEAD => $url->url);
+
+        my $sel = IO::Select->new($req);
+
+        if ($sel->can_read($timeout)) { 
+
+            # request headers are ready for reading
+
+            my ($code, $msg, %headers);
+
+            eval {
+                # uses 'die' internally
+                ($code, $msg, %headers) = $req->read_response_headers;
+            };
+
+            if ($@) {
+                $logger->info("url: in-flight error $@ [$url_text]");
+                $vcation->res_code('998');
+                $vcation->res_text($@);
+
+            } else {
+
+                $logger->info("url: received HTTP '$code' [$url_text]");
+
+                $vcation->res_code($code);
+                $vcation->res_text($msg);
+
+                # is this a redirect?
+                if ($code =~ /^3/) {
+
+                    if (my $loc = $headers{Location}) {
+                        $redir_url = Fieldmapper::url_verify::url->new;
+                        $redir_url->redirect_from($url->id);
+                        $redir_url->url($loc);
+                        $logger->info("url: redirect found $url_text => $loc");
+
+                    } else {
+                        $logger->info("url: server returned 3XX but no 'Location' header for url $url_text");
+                    }
+                } 
+            }
+
+        } else {
+
+            # request timed out
+            $logger->info("url: request timed out for $url_text");
+        }
+
+    } else { 
+
+        # Error building connection.  Invalid hostname, etc.
+
+        $vcation->res_code('999');
+        $vcation->res_text($@);
+    }
+
+    # Begin phase-II DB communication
+
+    $e->xact_begin;
+
+    if ($redir_url) {
+        $redir_url = $e->create_url_verify_url($redir_url) or return $e->die_event;
+        $vcation->redirect_to($redir_url->id);
+    }
+
+    $vcation->res_time('now');
+    $e->update_url_verify_url_verification($vcation) or return $e->die_event;
+    $e->commit;
+
+    return {
+        verification => $vcation,
+        redirect_url => $redir_url
+    };
+}
+
+
+
+
+1;