From b6bc810c2760e601c4c4a6da02c8b3cc5009e901 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Thu, 9 Aug 2012 16:42:02 -0400 Subject: [PATCH] URLVerify.pm; docs; url shuffling Signed-off-by: Bill Erickson --- .../perlmods/lib/OpenILS/Application/URLVerify.pm | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm index 64ec64bae2..5a99a8fe40 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm @@ -55,8 +55,6 @@ sub validate_session { my ($self, $client, $auth, $session_id, $url_ids, $options) = @_; $options ||= {}; - # loop through list of URLs / session URLs - # add a sleep (org setting) to the multisession handler # TODO: Avoid testing URLs having the same domain sequentially my $e = new_editor(authtoken => $auth, xact => 1); @@ -70,6 +68,8 @@ sub validate_session { if (!$url_ids) { + # No URLs provided, load all URLs for the requested session + my $query = { select => {uvu => ['id']}, from => { @@ -86,13 +86,14 @@ sub validate_session { }; if ($attempt_id) { - $logger->info("url: resuming attempt $attempt_id"); # when resuming an existing attempt (that presumably failed # mid-processing), we only want to process URLs that either # have no linked url_verification or have an un-completed # url_verification. + $logger->info("url: resuming attempt $attempt_id"); + $query->{from}->{uvu}->{uvuv} = { type => 'left', filter => {attempt => $attempt_id} @@ -109,7 +110,7 @@ sub validate_session { } else { - # this is a new run, so we only want to process URLs that + # this is a new attempt, so we only want to process URLs that # originated from the source records and not from redirects. $query->{where} = { @@ -119,6 +120,17 @@ sub validate_session { my $ids = $e->json_query($query); $url_ids = [ map {$_->{id}} @$ids ]; + + # this is kinda hinky and probably an abuse of order_by, but + # shuffling is good for spreading out domains and this avoids + # the necessity of loading all the URLs (could be lots) and + # shuffling them here. + + $url_ids = $e->search_url_verify_url( + [ {id => $url_ids}, + {order_by => {uvu => 'RANDOM()'}} + ], {idlist => 1} + ); } my $url_count = scalar(@$url_ids); -- 2.11.0