URLVerify.pm; more testing tweaks
authorBill Erickson <berick@esilibrary.com>
Wed, 8 Aug 2012 19:11:19 +0000 (15:11 -0400)
committerBill Erickson <berick@esilibrary.com>
Wed, 8 Aug 2012 19:11:19 +0000 (15:11 -0400)
Signed-off-by: Bill Erickson <berick@esilibrary.com>
Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm

index 997cfb3..973c522 100644 (file)
@@ -142,8 +142,7 @@ sub validate_session {
 
     # Now cycle through the URLs in batches.
 
-    my $batch_size = 5; # TODO: org setting
-    my $delay = 2; # TODO: org setting
+    my $batch_size = 10; # TODO: org setting
     my $num_processed = 0; # total number processed, including redirects
     my $resp_window = 1;
 
@@ -187,11 +186,6 @@ sub validate_session {
                     $resp_window *= 2 unless $resp_window == 256;
                 }
             }
-
-            # insert the per-thread delay, which keeps this thread
-            # active, which prevents a new thread from firing up to
-            # replace it. (thread = multisession session)
-            sleep $delay;
         },
 
         failure_handler => sub {
@@ -280,21 +274,22 @@ sub verify_url {
 
     return $e->event unless $e->allowed('VERIFY_URL');
 
-    my $depth = 0;
+    my $delay = 2; # TODO: org setting
     my $max_redirects = 20; # TODO: org setting
     my $timeout = 5; # TODO: org setting
 
+    my $depth = 0;
     my $cur_url = $url;
     my $loop_detected = 0;
 
     while ($depth++ < $max_redirects) {
 
-        if ($seen_urls{$cur_url->url}) {
+        if ($seen_urls{$cur_url->full_url}) {
             $loop_detected = 1;
             last;
         }
 
-        $seen_urls{$cur_url->url} = $cur_url;
+        $seen_urls{$cur_url->full_url} = $cur_url;
 
         my $url_resp = verify_one_url($e, $attempt, $cur_url, $timeout);
 
@@ -316,12 +311,12 @@ sub verify_url {
         $vcation->req_time('now');
 
         if ($loop_detected) {
-            $logger->info("url: redirect loop detected at " . $cur_url->url);
+            $logger->info("url: redirect loop detected at " . $cur_url->full_url);
             $vcation->res_code('996');
             $vcation->res_text('Redirect Loop');
 
         } else {
-            $logger->info("url: max redirects reached for " . $cur_url->url);
+            $logger->info("url: max redirects reached for " . $cur_url->full_url);
             $vcation->res_code('995');
             $vcation->res_text('Max Redirects');
         }
@@ -331,6 +326,12 @@ sub verify_url {
         $e->xact_commit;
     }
 
+    # The calling code is likely not multi-threaded, so a 
+    # per-URL (i.e. per-thread) delay would not be possible.  
+    # Applying the delay here allows the caller to process 
+    # batches of URLs without having to worry about the delay.
+    sleep $delay;
+
     return undef;
 }
 
@@ -346,7 +347,7 @@ sub verify_url {
 sub verify_one_url {
     my ($e, $attempt, $url, $timeout) = @_;
 
-    my $url_text = $url->url;
+    my $url_text = $url->full_url;
     my $redir_url;
 
     # first, create the verification object so we can a) indicate that
@@ -372,7 +373,7 @@ sub verify_one_url {
 
     if ($req) {
 
-        $req->write_request(HEAD => $url->url);
+        $req->write_request(HEAD => $url->full_url);
 
         my $sel = IO::Select->new($req);
 
@@ -405,7 +406,13 @@ sub verify_one_url {
                     if (my $loc = $headers{Location}) {
                         $redir_url = Fieldmapper::url_verify::url->new;
                         $redir_url->redirect_from($url->id);
-                        $redir_url->url($loc);
+                        $redir_url->full_url($loc);
+                        $redir_url->item($url->item);
+                        $redir_url->url_selector($url->url_selector);
+                        $redir_url->tag($url->tag);
+                        $redir_url->subfield($url->subfield);
+                        $redir_url->ord($url->ord);
+
                         $logger->info("url: redirect found $url_text => $loc");
 
                     } else {