Link checker: Allow configurable User Agent string
authorLebbeous Fogle-Weekley <lebbeous@esilibrary.com>
Wed, 26 Sep 2012 21:35:05 +0000 (17:35 -0400)
committerMike Rylander <mrylander@gmail.com>
Thu, 14 Feb 2013 19:19:17 +0000 (14:19 -0500)
Credit to Bill Erickson for noticing that tests were resulting in an
inordinate number of 403 Forbidden responses, which turned out to be
due to discrimination by sites against a libwww/* user agent string.

We now use "Evergreen <version> Link Checker" by default, and it's
configurable in opensrf.xml (grep for user_agent).

Signed-off-by: Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
Signed-off-by: Mike Rylander <mrylander@gmail.com>
Open-ILS/examples/opensrf.xml.example
Open-ILS/src/perlmods/lib/OpenILS/Application/URLVerify.pm

index 64527ef..ae7da96 100644 (file)
@@ -717,6 +717,7 @@ vim:et:ts=4:sw=4:
                     <max_spare_children>5</max_spare_children>
                 </unix_config>
                 <app_settings>
+                    <user_agent>Evergreen %s Link Checker</user_agent>
                 </app_settings>
             </open-ils.url_verify>
 
index ea1e4f7..cbda944 100644 (file)
@@ -6,6 +6,7 @@ use base qw/OpenILS::Application/;
 use strict; use warnings;
 use OpenSRF::Utils::Logger qw(:logger);
 use OpenSRF::MultiSession;
+use OpenSRF::Utils::SettingsClient;
 use OpenILS::Utils::Fieldmapper;
 use OpenILS::Utils::CStoreEditor q/:funcs/;
 use OpenILS::Application::AppUtils;
@@ -17,6 +18,18 @@ $Data::Dumper::Indent = 0;
 
 my $U = 'OpenILS::Application::AppUtils';
 
+my $user_agent_string;
+
+sub initialize {
+    my $conf = new OpenSRF::Utils::SettingsClient;
+
+    my @confpath = qw/apps open-ils.url_verify app_settings user_agent/;
+
+    $user_agent_string =
+        sprintf($conf->config_value(@confpath), __PACKAGE__->ils_version);
+
+    $logger->info("using '$user_agent_string' as User Agent string");
+}
 
 __PACKAGE__->register_method(
     method => 'verify_session',
@@ -562,7 +575,11 @@ sub verify_one_url {
 
     $ENV{FTP_PASSIVE} = 1; # TODO: setting?
 
-    my $ua = LWP::UserAgent->new(ssl_opts => {verify_hostname => 0}); # TODO: verify_hostname setting?
+    my $ua = LWP::UserAgent->new(
+        ssl_opts => {verify_hostname => 0}, # TODO: verify_hostname setting?
+        agent => $user_agent_string
+    );
+
     $ua->timeout($timeout);
 
     my $req = HTTP::Request->new(HEAD => $url->full_url);