--- /dev/null
+#!/usr/bin/perl
+use strict; use warnings;
+
+my $work_urls = {
+ 'dev198.esilibrary.com' => [
+ {path => '/eg/opac/record/61'}, # proto, etc.
+ {path => '/eg/opac/record/62'},
+ {path => '/eg/opac/record/63'},
+ {path => '/eg/opac/record/64'},
+ {path => '/eg/opac/record/65'},
+ {path => '/eg/opac/record/66'},
+ {path => '/eg/opac/record/67'},
+ {path => '/eg/opac/record/68'},
+ {path => '/eg/opac/record/69'},
+ {path => '/eg/opac/record/60'},
+ ]
+};
+
+package URLHandler;
+use Net::HTTP::NB;
+use IO::Select;
+
+use overload
+ '""' => sub {return $_[0]->{url}};
+
+sub new {
+ my ($class, $args) = @_;
+ my $self = bless($args, $class);
+ $self->url($self->{proto} . '://' . $self->{hostname} . $self->{path});
+ return $self;
+}
+
+# returns 1 to continue; 0 to skip this URL
+sub fire {
+ my $self = shift;
+ my $hostname = $self->{hostname};
+ my $path = $self->{path};
+ print "Firing " . $self->{url} . "\n";
+
+ my $req = Net::HTTP::NB->new(Host => $hostname);
+
+ if (!$req) {
+ warn "Unable to connect to $hostname\n";
+ return 0;
+ }
+
+ $req->write_request(HEAD => $self->{url});
+
+ $self->request($req);
+ $self->{fire_time} = time();
+ return 1;
+}
+
+sub request {
+ my ($self, $req) = @_;
+ $self->{request} = $req if $req;
+ return $self->{request};
+}
+
+sub url {
+ my ($self, $req) = @_;
+ $self->{url} = $req if $req;
+ return $self->{url};
+}
+
+sub process_response {
+ my $self = shift;
+ my ($code) = $self->{request}->read_response_headers;
+
+ print "code = $code; url = " . $self->{url} . "\n";
+
+ if ($code eq '200') {
+
+ } elsif ($code eq '302') {
+
+ } elsif ($code >= 400) {
+
+ }
+ # ...
+
+ return 1;
+}
+
+package URLChecker;
+use Net::HTTP::NB;
+use IO::Select;
+
+sub new {
+ my ($class, $args) = @_;
+ my $self = bless($args || {}, $class);
+ $self->{active} = [];
+ $self->{seen_urls} = {};
+ $self->{timeout} = 10;
+ $self->{delay} = 1;
+ $self->{max_threads} = 3;
+ return $self;
+}
+
+sub process_batch {
+ my $self = shift;
+
+ # loop through URLs
+ # for each URL, create a URL handler object
+ # launch max_threads handler objects
+ # select() on handler IO handles
+ # process each result
+ # -- if 302, append url to handler and start handler over
+ # -- otherwise clean up and push another handler into the mix
+
+ while (keys %$work_urls) {
+ $self->add_requests;
+ $self->process_responses;
+ sleep $self->{delay};
+ }
+}
+
+sub add_requests {
+ my $self = shift;
+
+ while (@{$self->{active}} < $self->{max_threads}) {
+ last unless keys %$work_urls;
+ my @hosts = keys %$work_urls;
+ my $host = $hosts[0];
+ my $paths = $work_urls->{$host};
+ my $path = pop(@$paths);
+
+ # no more paths for this host, remove the host
+ delete $work_urls->{$host} if @{$work_urls->{$host}} == 0;
+
+ my $handler = URLHandler->new({hostname => $host, path => $path->{path}, proto => 'http'});
+
+ if ($handler->fire) {
+ push(@{$self->{active}}, $handler);
+ } else {
+ # log that we were unable to connect to domain
+ # and update all paths for this domain
+ }
+ }
+}
+
+# XXX add a call to reap timed-out handlers
+sub process_responses {
+ my $self = shift;
+ my $sel = IO::Select->new;
+
+ $sel->add($_->request) for @{$self->{active}};
+
+ my @ready = $sel->can_read($self->{timeout});
+
+ for my $io_handle (@ready) {
+ warn "handle ready $io_handle\n";
+ my ($handler) = grep {$_->request eq $io_handle} @{$self->{active}};
+ warn "found handler $handler";
+ if ($handler->process_response) {
+ # remove the successful handler from the active list
+ $self->{active} = [ grep {$_->request ne $io_handle} @{$self->{active}} ];
+ # push another active URL onto the active list
+ } else {
+ # make handler process the redirect
+ }
+ }
+}
+
+
+package main;
+
+my $checker = URLChecker->new;
+$checker->process_batch;
+