--- /dev/null
+#!/usr/bin/perl
+# ----------------------------------------------------------------------
+# Find authority records newer than a specified age. Once found,
+# run each through the auth-to-auth linking process. Then locate
+# bib records that we might want to link to the new records and
+# pass them off to the bib-to-auth linker.
+# ----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use DateTime;
+use Pod::Usage qw/pod2usage/;
+use Time::HiRes qw/usleep/;
+
+my @auth_ids;
+my @bib_ids;
+my $counter = 0;
+
+# options
+my $help;
+my $new_since;
+my $print_auth_ids;
+my $print_bib_ids;
+my $link_auths;
+my $link_bibs;
+my $progress;
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_user = $ENV{PGDATABASE} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD};
+
+my $opt_result = GetOptions(
+ 'new-since=i' => \$new_since,
+ 'print-bib-ids' => \$print_bib_ids,
+ 'print-auth-ids' => \$print_auth_ids,
+ 'link-bibs' => \$link_bibs,
+ 'link-auths' => \$link_auths,
+ 'progress' => \$progress,
+ "db-host=s" => \$db_host,
+ "db-user=s" => \$db_user,
+ "db-pass=s" => \$db_pass,
+ "db-port=s" => \$db_port,
+ 'help' => \$help
+);
+
+sub announce {
+ my $msg = shift;
+ print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n";
+}
+
+pod2usage(0) if !$opt_result || $help;
+
+my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
+my $dbh = DBI->connect($dsn, $db_user, $db_pass)
+ or die "Cannot connect to database: $dsn\n";
+
+$dbh->do('SET statement_timeout = 0');
+
+# ----------------------------------------------------------------------
+# Find the new authority record IDs
+
+my $sth = $dbh->prepare(<<SQL);
+ SELECT id FROM authority.record_entry
+ WHERE DATE(create_date) >= DATE(NOW() - '$new_since day'::INTERVAL)
+SQL
+
+$sth->execute;
+while (my $ref = $sth->fetchrow_hashref()) {
+ push(@auth_ids, $ref->{id});
+}
+$sth->finish;
+
+my $auth_rec_count = scalar(@auth_ids);
+announce("Auth IDs: @auth_ids") if $print_auth_ids;
+
+if (!@auth_ids) {
+ announce("No authority records created in the last $new_since days");
+ exit 0;
+}
+
+# ----------------------------------------------------------------------
+# Auth-to-Auth linking
+
+if ($link_auths) {
+ # Pass all new authority records to the auth-to-auth linker
+ for my $rec_id (@auth_ids) {
+ system(
+ './authority_authority_linker.pl',
+ '--db-host', $db_host,
+ '--db-user', $db_user,
+ '--db-pass', ($db_pass || ''),
+ '--record', $rec_id
+ );
+
+ usleep(250000); # 1/4 second; allow ctrl-c to penetrate
+ announce("Auth records processed: $counter/$auth_rec_count")
+ if $progress && ++$counter % 10 == 0;
+ }
+}
+$counter = 0;
+
+# Exit if there is nothing left to do.
+exit unless $print_bib_ids || $link_bibs;
+
+# ----------------------------------------------------------------------
+# Find bib records that we might want to link to the new authority
+# record.
+#
+# Query: give me bib records that link to browse entries that also
+# link to exactly one authority record, specifically the new authority
+# records we are processing via this script. Only include bib records
+# that are not already linked via bib_linking to said authority record.
+# This represents the set of bib records that might need to be linked
+# to our new authority records.
+# ----------------------------------------------------------------------
+my %bib_ids; # de-dupe by record ID.
+my $auth_ids_param = join(',', @auth_ids);
+
+for my $axis (qw/author subject series title/) {
+ my $query = <<SQL;
+SELECT
+ entry.id,
+ are.id AS auth_record,
+ def.source AS bib_record
+FROM metabib.browse_${axis}_entry entry
+ JOIN metabib.browse_${axis}_entry_simple_heading_map map
+ ON (map.entry = entry.id)
+ JOIN authority.simple_heading ash ON (ash.id = map.simple_heading)
+ JOIN authority.record_entry are ON (are.id = ash.record)
+ JOIN metabib.browse_${axis}_entry_def_map def ON (def.entry = entry.id)
+ JOIN biblio.record_entry bre ON (bre.id = def.source)
+ JOIN (
+ -- we only care about browse entries that link to
+ -- exactly one auth record, the auth record in question.
+ SELECT entry.id, COUNT(are.id)
+ FROM metabib.browse_${axis}_entry entry
+ JOIN metabib.browse_${axis}_entry_simple_heading_map map
+ ON (map.entry = entry.id)
+ JOIN authority.simple_heading ash
+ ON (ash.id = map.simple_heading)
+ JOIN authority.record_entry are
+ ON (are.id = ash.record)
+ WHERE NOT are.deleted
+ GROUP BY 1
+ HAVING COUNT(are.id) = 1
+ ) singles ON (singles.id = entry.id)
+ LEFT JOIN authority.bib_linking link
+ ON (link.bib = def.source AND link.authority = are.id)
+WHERE
+ bre.deleted IS FALSE
+ AND link.authority IS NULL -- unlinked records
+ AND are.id IN ($auth_ids_param)
+SQL
+
+ $sth = $dbh->prepare($query);
+ $sth->execute;
+ while (my $ref = $sth->fetchrow_hashref()) {
+ $bib_ids{$ref->{bib_record}} = 1; # de-dupe
+ }
+ $sth->finish;
+}
+
+@bib_ids = sort(keys(%bib_ids));
+my $bib_rec_count = scalar(@bib_ids);
+
+if ($link_bibs) {
+ for my $rec_id (@bib_ids) {
+ # fire off the linker for each of the records identied
+ system('./authority_control_fields.pl',
+ '--db-host', $db_host,
+ '--db-user', $db_user,
+ '--db-pass', ($db_pass || ''),
+ '--record', $rec_id,
+ '--refresh'
+ );
+
+ usleep(250000); # 1/4 second; allow ctrl-c to penetrate
+ announce("Bib records processed: $counter/$bib_rec_count")
+ if $progress && ++$counter % 10 == 0;
+ }
+}
+
+announce("Bib IDs: @bib_ids") if $print_bib_ids;
+