URLVerify.pm; docs; url shuffling

author Bill Erickson <berick@esilibrary.com>

Thu, 9 Aug 2012 20:42:02 +0000 (16:42 -0400)

committer Bill Erickson <berick@esilibrary.com>

Thu, 9 Aug 2012 20:42:02 +0000 (16:42 -0400)
author Bill Erickson <berick@esilibrary.com>
Thu, 9 Aug 2012 20:42:02 +0000 (16:42 -0400)
committer Bill Erickson <berick@esilibrary.com>
Thu, 9 Aug 2012 20:42:02 +0000 (16:42 -0400)
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm

index 64ec64b..5a99a8f 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm
@@ -55,8 +55,6 @@ sub validate_session {
      my ($self, $client, $auth, $session_id, $url_ids, $options) = @_;
      $options ||= {};
  
-    # loop through list of URLs / session URLs
-    # add a sleep (org setting) to the multisession handler
      # TODO: Avoid testing URLs having the same domain sequentially
  
      my $e = new_editor(authtoken => $auth, xact => 1);
@@ -70,6 +68,8 @@ sub validate_session {
  
      if (!$url_ids) {
  
+        # No URLs provided, load all URLs for the requested session
+
          my $query = {
              select => {uvu => ['id']},
              from => {
@@ -86,13 +86,14 @@ sub validate_session {
          };
  
          if ($attempt_id) {
-            $logger->info("url: resuming attempt $attempt_id");
  
              # when resuming an existing attempt (that presumably failed
              # mid-processing), we only want to process URLs that either
              # have no linked url_verification or have an un-completed
              # url_verification.
  
+            $logger->info("url: resuming attempt $attempt_id");
+
              $query->{from}->{uvu}->{uvuv} = {
                  type => 'left',
                  filter => {attempt => $attempt_id}
@@ -109,7 +110,7 @@ sub validate_session {
  
          } else {
  
-            # this is a new run, so we only want to process URLs that
+            # this is a new attempt, so we only want to process URLs that
              # originated from the source records and not from redirects.
  
              $query->{where} = {
@@ -119,6 +120,17 @@ sub validate_session {
  
          my $ids = $e->json_query($query);
          $url_ids = [ map {$_->{id}} @$ids ];
+
+        # this is kinda hinky and probably an abuse of order_by, but 
+        # shuffling is good for spreading out domains and this avoids 
+        # the necessity of loading all the URLs (could be lots) and 
+        # shuffling them here.
+
+        $url_ids = $e->search_url_verify_url(
+            [   {id => $url_ids},
+                {order_by => {uvu => 'RANDOM()'}}
+            ],  {idlist => 1}
+        );
      }
  
      my $url_count = scalar(@$url_ids);
author	Bill Erickson <berick@esilibrary.com>
	Thu, 9 Aug 2012 20:42:02 +0000 (16:42 -0400)
committer	Bill Erickson <berick@esilibrary.com>
	Thu, 9 Aug 2012 20:42:02 +0000 (16:42 -0400)