URLVerify.pm; move to lwp to support ftp and simplify
authorBill Erickson <berick@esilibrary.com>
Thu, 9 Aug 2012 16:53:02 +0000 (12:53 -0400)
committerBill Erickson <berick@esilibrary.com>
Thu, 9 Aug 2012 16:53:02 +0000 (12:53 -0400)
Signed-off-by: Bill Erickson <berick@esilibrary.com>
Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm

index c3f1dd5..60c6e19 100644 (file)
@@ -6,7 +6,7 @@ use OpenSRF::MultiSession;
 use OpenILS::Utils::Fieldmapper;
 use OpenILS::Utils::CStoreEditor q/:funcs/;
 use OpenILS::Application::AppUtils;
-use Net::HTTP::NB;
+use LWP::UserAgent;
 
 my $U = 'OpenILS::Application::AppUtils';
 
@@ -276,7 +276,7 @@ sub verify_url {
     my $url = $e->retrieve_url_verify_url($url_id) or return $e->event;
 
     my ($attempt, $delay, $max_redirects, $timeout) =
-        collect_verify_attempt_and_settings($attempt_id);
+        collect_verify_attempt_and_settings($e, $attempt_id);
 
     return $e->event unless $e->allowed(
         'VERIFY_URL', $attempt->session->owning_lib);
@@ -361,7 +361,7 @@ sub collect_verify_attempt_and_settings {
         # we need to guarantee a write-DB read.
         $e->xact_begin;
 
-        $$attempt = 
+        $attempt = 
             $e->retrieve_url_verify_verification_attempt([
                 $attempt_id, {
                     flesh => 1,
@@ -385,10 +385,15 @@ sub collect_verify_attempt_and_settings {
 
         $cache{timeout}{$org} = $U->ou_ancestor_setting_value(
             $org, 'url_verify.url_verification_max_wait', $e) || 5;
+
+        $logger->info(
+            sprintf("url: loaded settings delay=%s; max_redirects=%s; timeout=%s",
+                $cache{delay}{$org}, $cache{redirects}{$org}, $cache{timeout}{$org}));
     }
 
     $cache{age}++;
 
+
     return (
         $cache{attempt}{$attempt_id},
         $cache{delay}{$org},
@@ -431,73 +436,35 @@ sub verify_one_url {
 
     # Now test the URL.
 
-    my $req;
-    eval {
-        # uses 'die' internally
-        $req = Net::HTTP::NB->new(Host => $url->host);
-    };
-
-    if ($req) {
-
-        $req->write_request(HEAD => $url->full_url);
-
-        my $sel = IO::Select->new($req);
-
-        if ($sel->can_read($timeout)) {
-
-            # request headers are ready for reading
-
-            my ($code, $msg, %headers);
-
-            eval {
-                # uses 'die' internally
-                ($code, $msg, %headers) = $req->read_response_headers;
-            };
-
-            if ($@) {
-                $logger->info("url: in-flight error $@ [$url_text]");
-                $vcation->res_code('998');
-                $vcation->res_text($@);
-
-            } else {
+    $ENV{FTP_PASSIVE} = 1; # TODO: setting?
 
-                $logger->info("url: received HTTP '$code' [$url_text]");
+    my $ua = LWP::UserAgent->new(ssl_opts => {verify_hostname => 0}); # TODO: verify_hostname setting?
+    my $req = HTTP::Request->new(HEAD => $url->full_url);
+    my $res = $ua->simple_request($req); # avoid auto-redirect
 
-                $vcation->res_code($code);
-                $vcation->res_text($msg);
+    $logger->info(sprintf(
+        "url: received HTTP '%s' / '%s' [%s]", 
+        $res->code, 
+        $res->message,
+        $url_text
+    ));
 
-                # is this a redirect?
-                if ($code =~ /^3/) {
+    $vcation->res_code($res->code);
+    $vcation->res_text($res->message);
 
-                    if (my $loc = $headers{Location}) {
-                        $redir_url = Fieldmapper::url_verify::url->new;
-                        $redir_url->redirect_from($url->id);
-                        $redir_url->full_url($loc);
+    # is this a redirect?
+    if ($res->code =~ /^3/) {
 
-                        $logger->info("url: redirect found $url_text => $loc");
+        if (my $loc = $res->headers->{location}) {
+            $redir_url = Fieldmapper::url_verify::url->new;
+            $redir_url->redirect_from($url->id);
+            $redir_url->full_url($loc);
 
-                    } else {
-                        $logger->info("url: server returned 3XX but no 'Location' header for url $url_text");
-                    }
-                }
-            }
+            $logger->info("url: redirect found $url_text => $loc");
 
         } else {
-
-            # request timed out
-            $logger->info("url: request timed out for $url_text");
-
-            $vcation->res_code('997');
-            $vcation->res_text('Request Timeout');
+            $logger->info("url: server returned 3XX but no 'Location' header for url $url_text");
         }
-
-    } else {
-
-        # Error building connection.  Invalid hostname, etc.
-
-        $logger->info("url: error building connection: $@");
-        $vcation->res_code('999');
-        $vcation->res_text($@);
     }
 
     # Begin phase-II DB communication