From a182d4b900ca840cf7bd560905d825dcd2ac0a69 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Tue, 7 Aug 2012 10:49:23 -0400 Subject: [PATCH] test validation script Signed-off-by: Bill Erickson --- validate.pl | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 validate.pl diff --git a/validate.pl b/validate.pl new file mode 100644 index 0000000000..1ffe3bd8c4 --- /dev/null +++ b/validate.pl @@ -0,0 +1,169 @@ +#!/usr/bin/perl +use strict; use warnings; + +my $work_urls = { + 'dev198.esilibrary.com' => [ + {path => '/eg/opac/record/61'}, # proto, etc. + {path => '/eg/opac/record/62'}, + {path => '/eg/opac/record/63'}, + {path => '/eg/opac/record/64'}, + {path => '/eg/opac/record/65'}, + {path => '/eg/opac/record/66'}, + {path => '/eg/opac/record/67'}, + {path => '/eg/opac/record/68'}, + {path => '/eg/opac/record/69'}, + {path => '/eg/opac/record/60'}, + ] +}; + +package URLHandler; +use Net::HTTP::NB; +use IO::Select; + +use overload + '""' => sub {return $_[0]->{url}}; + +sub new { + my ($class, $args) = @_; + my $self = bless($args, $class); + $self->url($self->{proto} . '://' . $self->{hostname} . $self->{path}); + return $self; +} + +# returns 1 to continue; 0 to skip this URL +sub fire { + my $self = shift; + my $hostname = $self->{hostname}; + my $path = $self->{path}; + print "Firing " . $self->{url} . "\n"; + + my $req = Net::HTTP::NB->new(Host => $hostname); + + if (!$req) { + warn "Unable to connect to $hostname\n"; + return 0; + } + + $req->write_request(HEAD => $self->{url}); + + $self->request($req); + $self->{fire_time} = time(); + return 1; +} + +sub request { + my ($self, $req) = @_; + $self->{request} = $req if $req; + return $self->{request}; +} + +sub url { + my ($self, $req) = @_; + $self->{url} = $req if $req; + return $self->{url}; +} + +sub process_response { + my $self = shift; + my ($code) = $self->{request}->read_response_headers; + + print "code = $code; url = " . $self->{url} . "\n"; + + if ($code eq '200') { + + } elsif ($code eq '302') { + + } elsif ($code >= 400) { + + } + # ... + + return 1; +} + +package URLChecker; +use Net::HTTP::NB; +use IO::Select; + +sub new { + my ($class, $args) = @_; + my $self = bless($args || {}, $class); + $self->{active} = []; + $self->{seen_urls} = {}; + $self->{timeout} = 10; + $self->{delay} = 1; + $self->{max_threads} = 3; + return $self; +} + +sub process_batch { + my $self = shift; + + # loop through URLs + # for each URL, create a URL handler object + # launch max_threads handler objects + # select() on handler IO handles + # process each result + # -- if 302, append url to handler and start handler over + # -- otherwise clean up and push another handler into the mix + + while (keys %$work_urls) { + $self->add_requests; + $self->process_responses; + sleep $self->{delay}; + } +} + +sub add_requests { + my $self = shift; + + while (@{$self->{active}} < $self->{max_threads}) { + last unless keys %$work_urls; + my @hosts = keys %$work_urls; + my $host = $hosts[0]; + my $paths = $work_urls->{$host}; + my $path = pop(@$paths); + + # no more paths for this host, remove the host + delete $work_urls->{$host} if @{$work_urls->{$host}} == 0; + + my $handler = URLHandler->new({hostname => $host, path => $path->{path}, proto => 'http'}); + + if ($handler->fire) { + push(@{$self->{active}}, $handler); + } else { + # log that we were unable to connect to domain + # and update all paths for this domain + } + } +} + +# XXX add a call to reap timed-out handlers +sub process_responses { + my $self = shift; + my $sel = IO::Select->new; + + $sel->add($_->request) for @{$self->{active}}; + + my @ready = $sel->can_read($self->{timeout}); + + for my $io_handle (@ready) { + warn "handle ready $io_handle\n"; + my ($handler) = grep {$_->request eq $io_handle} @{$self->{active}}; + warn "found handler $handler"; + if ($handler->process_response) { + # remove the successful handler from the active list + $self->{active} = [ grep {$_->request ne $io_handle} @{$self->{active}} ]; + # push another active URL onto the active list + } else { + # make handler process the redirect + } + } +} + + +package main; + +my $checker = URLChecker->new; +$checker->process_batch; + -- 2.11.0