my ($self, $client, $auth, $session_id, $url_ids, $options) = @_;
$options ||= {};
- # loop through list of URLs / session URLs
- # add a sleep (org setting) to the multisession handler
# TODO: Avoid testing URLs having the same domain sequentially
my $e = new_editor(authtoken => $auth, xact => 1);
if (!$url_ids) {
+ # No URLs provided, load all URLs for the requested session
+
my $query = {
select => {uvu => ['id']},
from => {
};
if ($attempt_id) {
- $logger->info("url: resuming attempt $attempt_id");
# when resuming an existing attempt (that presumably failed
# mid-processing), we only want to process URLs that either
# have no linked url_verification or have an un-completed
# url_verification.
+ $logger->info("url: resuming attempt $attempt_id");
+
$query->{from}->{uvu}->{uvuv} = {
type => 'left',
filter => {attempt => $attempt_id}
} else {
- # this is a new run, so we only want to process URLs that
+ # this is a new attempt, so we only want to process URLs that
# originated from the source records and not from redirects.
$query->{where} = {
my $ids = $e->json_query($query);
$url_ids = [ map {$_->{id}} @$ids ];
+
+ # this is kinda hinky and probably an abuse of order_by, but
+ # shuffling is good for spreading out domains and this avoids
+ # the necessity of loading all the URLs (could be lots) and
+ # shuffling them here.
+
+ $url_ids = $e->search_url_verify_url(
+ [ {id => $url_ids},
+ {order_by => {uvu => 'RANDOM()'}}
+ ], {idlist => 1}
+ );
}
my $url_count = scalar(@$url_ids);