# Now cycle through the URLs in batches.
- my $batch_size = 5; # TODO: org setting
- my $delay = 2; # TODO: org setting
+ my $batch_size = 10; # TODO: org setting
my $num_processed = 0; # total number processed, including redirects
my $resp_window = 1;
$resp_window *= 2 unless $resp_window == 256;
}
}
-
- # insert the per-thread delay, which keeps this thread
- # active, which prevents a new thread from firing up to
- # replace it. (thread = multisession session)
- sleep $delay;
},
failure_handler => sub {
return $e->event unless $e->allowed('VERIFY_URL');
- my $depth = 0;
+ my $delay = 2; # TODO: org setting
my $max_redirects = 20; # TODO: org setting
my $timeout = 5; # TODO: org setting
+ my $depth = 0;
my $cur_url = $url;
my $loop_detected = 0;
while ($depth++ < $max_redirects) {
- if ($seen_urls{$cur_url->url}) {
+ if ($seen_urls{$cur_url->full_url}) {
$loop_detected = 1;
last;
}
- $seen_urls{$cur_url->url} = $cur_url;
+ $seen_urls{$cur_url->full_url} = $cur_url;
my $url_resp = verify_one_url($e, $attempt, $cur_url, $timeout);
$vcation->req_time('now');
if ($loop_detected) {
- $logger->info("url: redirect loop detected at " . $cur_url->url);
+ $logger->info("url: redirect loop detected at " . $cur_url->full_url);
$vcation->res_code('996');
$vcation->res_text('Redirect Loop');
} else {
- $logger->info("url: max redirects reached for " . $cur_url->url);
+ $logger->info("url: max redirects reached for " . $cur_url->full_url);
$vcation->res_code('995');
$vcation->res_text('Max Redirects');
}
$e->xact_commit;
}
+ # The calling code is likely not multi-threaded, so a
+ # per-URL (i.e. per-thread) delay would not be possible.
+ # Applying the delay here allows the caller to process
+ # batches of URLs without having to worry about the delay.
+ sleep $delay;
+
return undef;
}
sub verify_one_url {
my ($e, $attempt, $url, $timeout) = @_;
- my $url_text = $url->url;
+ my $url_text = $url->full_url;
my $redir_url;
# first, create the verification object so we can a) indicate that
if ($req) {
- $req->write_request(HEAD => $url->url);
+ $req->write_request(HEAD => $url->full_url);
my $sel = IO::Select->new($req);
if (my $loc = $headers{Location}) {
$redir_url = Fieldmapper::url_verify::url->new;
$redir_url->redirect_from($url->id);
- $redir_url->url($loc);
+ $redir_url->full_url($loc);
+ $redir_url->item($url->item);
+ $redir_url->url_selector($url->url_selector);
+ $redir_url->tag($url->tag);
+ $redir_url->subfield($url->subfield);
+ $redir_url->ord($url->ord);
+
$logger->info("url: redirect found $url_text => $loc");
} else {