From: Bill Erickson Date: Thu, 12 May 2016 20:48:04 +0000 (-0400) Subject: JBAS-1417 New authority record linking script X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=48122d12f4e5a2369656ee7d24d5ec2453051847;p=working%2FEvergreen.git JBAS-1417 New authority record linking script Runs auth-to-auth and bib-to-auth linking for authority records created since a specified date. Signed-off-by: Bill Erickson --- diff --git a/KCLS/linking/link-new-auth-records.pl b/KCLS/linking/link-new-auth-records.pl new file mode 100755 index 0000000000..9a0f4a0f5c --- /dev/null +++ b/KCLS/linking/link-new-auth-records.pl @@ -0,0 +1,185 @@ +#!/usr/bin/perl +# ---------------------------------------------------------------------- +# Find authority records newer than a specified age. Once found, +# run each through the auth-to-auth linking process. Then locate +# bib records that we might want to link to the new records and +# pass them off to the bib-to-auth linker. +# ---------------------------------------------------------------------- +use strict; +use warnings; +use DBI; +use Getopt::Long; +use DateTime; +use Pod::Usage qw/pod2usage/; +use Time::HiRes qw/usleep/; + +my @auth_ids; +my @bib_ids; +my $counter = 0; + +# options +my $help; +my $new_since; +my $print_auth_ids; +my $print_bib_ids; +my $link_auths; +my $link_bibs; +my $progress; +my $db_host = $ENV{PGHOST} || 'localhost'; +my $db_port = $ENV{PGPORT} || '5432'; +my $db_user = $ENV{PGDATABASE} || 'evergreen'; +my $db_pass = $ENV{PGPASSWORD}; + +my $opt_result = GetOptions( + 'new-since=i' => \$new_since, + 'print-bib-ids' => \$print_bib_ids, + 'print-auth-ids' => \$print_auth_ids, + 'link-bibs' => \$link_bibs, + 'link-auths' => \$link_auths, + 'progress' => \$progress, + "db-host=s" => \$db_host, + "db-user=s" => \$db_user, + "db-pass=s" => \$db_pass, + "db-port=s" => \$db_port, + 'help' => \$help +); + +sub announce { + my $msg = shift; + print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n"; +} + +pod2usage(0) if !$opt_result || $help; + +my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port"; +my $dbh = DBI->connect($dsn, $db_user, $db_pass) + or die "Cannot connect to database: $dsn\n"; + +$dbh->do('SET statement_timeout = 0'); + +# ---------------------------------------------------------------------- +# Find the new authority record IDs + +my $sth = $dbh->prepare(<= DATE(NOW() - '$new_since day'::INTERVAL) +SQL + +$sth->execute; +while (my $ref = $sth->fetchrow_hashref()) { + push(@auth_ids, $ref->{id}); +} +$sth->finish; + +my $auth_rec_count = scalar(@auth_ids); +announce("Auth IDs: @auth_ids") if $print_auth_ids; + +if (!@auth_ids) { + announce("No authority records created in the last $new_since days"); + exit 0; +} + +# ---------------------------------------------------------------------- +# Auth-to-Auth linking + +if ($link_auths) { + # Pass all new authority records to the auth-to-auth linker + for my $rec_id (@auth_ids) { + system( + './authority_authority_linker.pl', + '--db-host', $db_host, + '--db-user', $db_user, + '--db-pass', ($db_pass || ''), + '--record', $rec_id + ); + + usleep(250000); # 1/4 second; allow ctrl-c to penetrate + announce("Auth records processed: $counter/$auth_rec_count") + if $progress && ++$counter % 10 == 0; + } +} +$counter = 0; + +# Exit if there is nothing left to do. +exit unless $print_bib_ids || $link_bibs; + +# ---------------------------------------------------------------------- +# Find bib records that we might want to link to the new authority +# record. +# +# Query: give me bib records that link to browse entries that also +# link to exactly one authority record, specifically the new authority +# records we are processing via this script. Only include bib records +# that are not already linked via bib_linking to said authority record. +# This represents the set of bib records that might need to be linked +# to our new authority records. +# ---------------------------------------------------------------------- +my %bib_ids; # de-dupe by record ID. +my $auth_ids_param = join(',', @auth_ids); + +for my $axis (qw/author subject series title/) { + my $query = <prepare($query); + $sth->execute; + while (my $ref = $sth->fetchrow_hashref()) { + $bib_ids{$ref->{bib_record}} = 1; # de-dupe + } + $sth->finish; +} + +@bib_ids = sort(keys(%bib_ids)); +my $bib_rec_count = scalar(@bib_ids); + +if ($link_bibs) { + for my $rec_id (@bib_ids) { + # fire off the linker for each of the records identied + system('./authority_control_fields.pl', + '--db-host', $db_host, + '--db-user', $db_user, + '--db-pass', ($db_pass || ''), + '--record', $rec_id, + '--refresh' + ); + + usleep(250000); # 1/4 second; allow ctrl-c to penetrate + announce("Bib records processed: $counter/$bib_rec_count") + if $progress && ++$counter % 10 == 0; + } +} + +announce("Bib IDs: @bib_ids") if $print_bib_ids; +