--- /dev/null
+= Backstage Processes =
+
+Perform steps as 'opensrf'
+
+== Quarterly Export + Import ==
+
+=== Setup ===
+
+[source,sh]
+--------------------------------------------------------------------
+export EXPORT_DATE=2016-10-01 # for example
+export WORKING_DIR=/openils/var/data/authority-control/backstage/quarterly/$EXPORT_DATE
+export PGHOST=foo
+export PGPASSWORD=foo
+export PGUSER=evergreen
+mkdir -p $WORKING_DIR
+--------------------------------------------------------------------
+
+=== Exporting Bib Records ===
+
+Bibs are exported as MARC and uploaded to Backstage
+
+==== Generate Export MARC File ====
+
+[source,sh]
+--------------------------------------------------------------------
+./export-bibs.pl \
+ --start-date 2010-01-01 \
+ --end-date 2016-06-01 \
+ --export-date $EXPORT_DATE \
+ --out-file $WORKING_DIR/exported-bibs.$EXPORT_DATE.mrc
+
+# Send file to BS FTP server in(bound) directory.
+--------------------------------------------------------------------
+
+=== Process Results ===
+
+==== Fetch Results ====
+
+[source,sh]
+--------------------------------------------------------------------
+cd $WORKING_DIR
+wget <backstage-results-file>
+--------------------------------------------------------------------
+
+==== Process Results Files ====
+
+* Import new and modified authority records.
+* Import modified bib records.
+
+[source,sh]
+--------------------------------------------------------------------
+cd /home/opensrf/Evergreen/KCLS/backstage/
+
+./process-backstage-files.pl \
+ --verbose \
+ --export-date $EXPORT_DATE \
+ --file $WORKING_DIR/<results-file> \
+ --working-dir $WORKING_DIR \
+ --bib-collision-file bib-collisions.mrc \
+ > $WORKING_DIR/process.log
+--------------------------------------------------------------------
+
+==== Process Bib Collisions ====
+
+Bib records that were locally modified during Backstage processing are
+re-imported without clobbering the modifications.
+
+1. Create a new queue for this batch (rename to suit).
+
+[source,sh]
+--------------------------------------------------------------------
+INSERT INTO vandelay.bib_queue (owner, name)
+ VALUES (1, 'Backstage Q3 2016');
+SELECT id FROM vandelay.bib_queue WHERE name = 'Backstage Q3 2016';
+--------------------------------------------------------------------
+
+2. Import bib collisions via stream importer.
+
+[source,sh]
+--------------------------------------------------------------------
+# Make a copy of the collisions file for safe keeping,
+# since open-ils.vandelay deletes spool files.
+cp $WORKING_DIR/bib-collisions.mrc $WORKING_DIR/bib-collisions.bak.mrc
+
+cd /openils/bin
+./marc_stream_importer.pl \
+ --spoolfile $WORKING_DIR/bib-collisions.mrc \
+ --user admin \
+ --password XXX \
+ --bib-auto-overlay-exact \
+ --queue <new-queue-id> \
+ --merge-profile 104 # "Backstage Field Protection"
+--------------------------------------------------------------------
+
+==== Get auth/bib IDs that require re-linking ====
+
+[source,sh]
+--------------------------------------------------------------------
+cd /home/opensrf/Evergreen/KCLS/linking/
+
+./link-new-auth-records.pl --modified-since <days-since-reimport> \
+ --progress --print-auth-ids \
+ > $WORKING_DIR/auths-to-link.txt
+
+./link-new-auth-records.pl --modified-since <days-since-reimport> \
+ --progress --print-bib-ids \
+ > $WORKING_DIR/bibs-to-link.txt
+--------------------------------------------------------------------
+
+==== Re-Link Modified Auths and Bibs ====
+
+[source,sh]
+--------------------------------------------------------------------
+cd /home/opensrf/Evergreen/KCLS/authority-control/linking/
+
+./authority_authority_linker.pl --verbose \
+ --file $WORKING_DIR/auths-to-link.txt \
+ | tee -a $WORKING_DIR/auth2auth-linking.log
+
+# Bib linking takes many hours, sometimes days.
+
+./authority_control_fields.pl --verbose --refresh \
+ --file $WORKING_DIR/bibs-to-link.txt \
+ | tee -a $WORKING_DIR/bib-linking.log
+--------------------------------------------------------------------
+
--- /dev/null
+#!/usr/bin/env perl
+# -----------------------------------------------------------------------
+# Export bib records for Backstage processing.
+#
+# The UTF-8 encoded USMARC string for each record is printed to STDOUT.
+# Each exported bib has its export_date value updated to NOW().
+#
+# Exported bibs meet the following criteria:
+#
+# 1. Delete flag must be false.
+# 2. Record cannot contain any 086, 092, or 099 tags containing the phrase 'on order'
+# 3. Boolean filter:
+# [ (001_test OR 035_test) AND has_holdings AND cat_date_in_range ]
+# OR
+# [ 998_test AND create_date_in_range ]
+# -----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+
+my $db_handle;
+
+my $start_date;
+my $end_date;
+my $export_date;
+my $ids_only;
+my $count_only;
+my $out_file;
+my $limit;
+my $db_user = $ENV{PGUSER} || 'evergreen';
+my $db_name = $ENV{PGDATABASE} || 'evergreen';
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_pass = $ENV{PGPASSWORD};
+my $help;
+
+GetOptions(
+ 'start-date=s' => \$start_date,
+ 'end-date=s' => \$end_date,
+ 'export-date=s' => \$export_date,
+ 'ids-only' => \$ids_only,
+ 'count-only' => \$count_only,
+ 'out-file=s' => \$out_file,
+ 'limit=f' => \$limit,
+ 'db-user=s' => \$db_user,
+ 'db-host=s' => \$db_host,
+ 'db-name=s' => \$db_name,
+ 'db-port=i' => \$db_port,
+ 'db-pass=s' => \$db_pass,
+ 'help' => \$help
+);
+
+sub help {
+ print <<HELP;
+
+Export bib records for uploading to Backstage for processing.
+MARC data is sent to STDOUT. Redirect as needed.
+
+$0 --start-date 2015-06-01 \
+ --end-date 2016-06-01 \
+ --export-date 2016-06-01
+
+Options
+
+ --start-date <YYYY-MM-DD>
+ --end-date <YYYY-MM-DD>
+ Export bib records whose cataloging_date (for physical records) or
+ create_date (for electronic records) value is between the provided
+ start and end dates.
+
+ --export-date <YYYY-MM-DD>
+ Sets the export date to the provided value. If no --export-date
+ value is set, no export date value will be applied in the database.
+
+ --out-file </path/to/file>
+ Write MARC records (or IDs) to this file.
+
+ --ids-only
+ Write bib record IDs to the output file instead of the full MARC
+ record.
+
+ --count-only
+ Only print the number of bibs that would be exported to STDOUT.
+
+ --limit
+ Export at most this many records.
+
+HELP
+ exit;
+}
+
+die "--start-date and --end-date required\n"
+ unless $start_date && $end_date;
+
+die "Invalid date format\n" unless
+ $start_date =~ /^\d{4}-\d{2}-\d{2}$/ &&
+ $end_date =~ /^\d{4}-\d{2}-\d{2}$/ &&
+ (!$export_date || $export_date =~ /^\d{4}-\d{2}-\d{2}$/);
+
+die "--out-file <filename> required\n" unless $out_file || $count_only;
+
+sub bib_query {
+ my $sql = <<SQL;
+
+-- viable_records include filters applied to all records.
+-- not deleted
+-- is not on order (086/092/099 test)
+WITH viable_records AS (
+
+ SELECT bre.id, bre.cataloging_date, bre.create_date
+ FROM biblio.record_entry bre
+ WHERE NOT deleted
+ AND NOT EXISTS (
+ SELECT 1 FROM metabib.real_full_rec
+ WHERE record = bre.id
+ AND tag IN ('086', '092', '099')
+ AND value ILIKE '%on order%'
+ )
+
+-- electronic_records have a valid 998d value for electronic records
+), electronic_records AS (
+
+ SELECT vr.id, create_date AS filter_date
+ FROM viable_records vr
+ JOIN metabib.real_full_rec mrfr ON (vr.id = mrfr.record)
+ WHERE
+ mrfr.tag = '998' AND
+ mrfr.subfield = 'd' AND
+ mrfr.value IN ('d','t','v','w','x','y','1','6')
+
+-- physical records are non-electronic, have at least one viable
+-- linked copy, have a valid 001 OR 035 field, and have a
+-- non-null cataloging date.
+), physical_records AS (
+
+ SELECT vr.id, vr.cataloging_date AS filter_date
+ FROM viable_records vr
+ JOIN metabib.real_full_rec mfr ON (
+ mfr.record = vr.id AND (
+ (
+ mfr.tag = '001' AND (
+ mfr.value ILIKE 'oc%' OR
+ mfr.value ILIKE 'on%' OR
+ mfr.value ILIKE 'wln%'
+ )
+ ) OR (
+ mfr.tag = '035' AND
+ mfr.subfield = 'a' AND
+ mfr.value ILIKE '%WaOLN%'
+ )
+ )
+ )
+ WHERE vr.cataloging_date IS NOT NULL AND EXISTS (
+ -- bib has at least one non-deleted copy
+ SELECT acp.id
+ FROM asset.copy acp
+ WHERE call_number IN (
+ SELECT id FROM asset.call_number
+ WHERE record = vr.id AND NOT deleted
+ )
+ AND NOT deleted
+ LIMIT 1
+ )
+
+), combined_records AS (
+ SELECT id, filter_date
+ FROM electronic_records
+ UNION
+ SELECT id, filter_date
+ FROM physical_records
+)
+
+SELECT DISTINCT(cr.id)
+FROM combined_records cr
+WHERE cr.filter_date BETWEEN '$start_date' AND '$end_date'
+SQL
+
+ $sql .= " LIMIT $limit" if $limit;
+ return $sql;
+}
+
+
+# Finds bibs to export and prints their MARC to STDOUT
+sub export_marc {
+
+ if ($out_file) {
+ open(MARCFILE, ">$out_file")
+ or die "Cannot open file for writing: $out_file\n";
+ binmode(MARCFILE, ':utf8');
+ }
+
+ my $sth = $db_handle->prepare(bib_query());
+ my $edate_sth = $db_handle->prepare(
+ 'SELECT * FROM metabib.set_export_date(?, ?)');
+
+ $sth->execute;
+ my $count = 0;
+ while (my $bib = $sth->fetchrow_hashref) {
+ $count++;
+ next if $count_only;
+
+ my $bib_id = $bib->{id};
+
+ if ($ids_only) {
+ print MARCFILE "$bib_id\n";
+ print "$count records written...\n" if ($count % 1000) == 0;
+ next;
+ }
+
+ my $rec = $db_handle->selectall_arrayref(
+ "SELECT marc FROM biblio.record_entry WHERE id = $bib_id");
+
+ my $marc = $rec->[0]->[0];
+ my $marcdoc = MARC::Record->new_from_xml($marc, 'UTF-8', 'USMARC');
+
+ print MARCFILE $marcdoc->as_usmarc;
+
+ print "$count records written...\n" if ($count % 1000) == 0;
+
+ next unless $export_date;
+
+ # Update the bib record's metabib.bib_export_data entry.
+ eval { $edate_sth->execute($bib_id, $export_date) };
+ die "Error setting export date for bib ".
+ "$bib_id to $export_date : $@\n" if $@;
+ }
+
+ close(MARCFILE) if $out_file;
+
+ print "$count total bib records\n";
+
+ $sth->finish;
+ $edate_sth->finish;
+}
+
+sub connect_db {
+ $db_handle = DBI->connect(
+ "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
+ $db_user, $db_pass, {
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 1,
+ pg_expand_array => 0,
+ pg_enable_utf8 => 1
+ }
+ ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
+}
+
+connect_db();
+export_marc();
+
+$db_handle->disconnect;
+
--- /dev/null
+#!/usr/bin/env perl
+# -----------------------------------------------------------------------
+# TODO: summary
+#
+# TODO:
+# Disable auth record change propagation during auth record updates.
+# -----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use DateTime;
+use Getopt::Long;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use MARC::File::USMARC;
+use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
+use File::Basename;
+use Sys::Syslog qw(syslog openlog);
+use OpenILS::Utils::Normalize qw(clean_marc);
+binmode(STDOUT, ':utf8');
+
+my $db_handle;
+my $log_mod = 500; # log every 500th of each type of event (see verbose)
+
+my $file;
+my $export_date;
+my $working_dir = '.',
+my $bib_collision_file;
+my $verbose;
+my $db_user = $ENV{PGUSER} || 'evergreen';
+my $db_name = $ENV{PGDATABASE} || 'evergreen';
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_pass = $ENV{PGPASSWORD};
+
+my $syslog_facility = 'LOCAL6'; # matches Evergreen gateway
+my $syslog_ops = 'pid';
+my $syslog_ident = 'BACKSTAGE';
+
+my $new_auth_sth;
+my $mod_auth_sth;
+my $del_auth_sth;
+my $delmod_auth_sth;
+my $mod_bibs_sth;
+my $match_auth_sth;
+my $match_auth_001_sth;
+my $new_auth_ctr = 0;
+my $mod_auth_ctr = 0;
+my $del_auth_ctr = 0;
+my $mod_bibs_ctr = 0;
+my $col_bibs_ctr = 0;
+
+my $help;
+
+GetOptions(
+ 'file=s' => \$file,
+ 'export-date=s' => \$export_date,
+ 'working-dir=s' => \$working_dir,
+ 'bib-collision-file=s' => \$bib_collision_file,
+ 'verbose' => \$verbose,
+ 'db-user=s' => \$db_user,
+ 'db-host=s' => \$db_host,
+ 'db-name=s' => \$db_name,
+ 'db-port=i' => \$db_port,
+ 'db-pass=s' => \$db_pass,
+ 'help' => \$help
+);
+
+sub help {
+ print <<HELP;
+
+ TODO
+
+$0
+
+Options
+
+ --export-date
+ Bib records modified within EG since this time will be treated
+ specially when ingesting bib records produced by Backstage to
+ avoid losing change made by staff since the export.
+
+ --file
+ Full path to ZIP file to process.
+
+ --working-dir
+ Directory where constituent files are extracted.
+ Defaults to the CWD of this script.
+
+ --bib-collision-file
+ File created in the working directory containing MARC data of
+ bib records that were modified by staff after export and
+ modified by Backstage as part of the export. These are
+ re-imported via external Vandelay process.
+
+ --db-host
+ --db-name
+ --db-port
+ --db-pass
+ Database connections parameters.
+
+HELP
+ exit;
+}
+
+die "required: --export-date YYYY-MM-DD\n" unless
+ $export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/;
+
+die "--file required\n" unless $file;
+
+# Log every occurrence of each event type.
+$log_mod = 1 if $verbose;
+
+sub announce {
+ my ($level, $msg, $die) = @_;
+ syslog("LOG_$level", $msg);
+
+ my $date_str = DateTime->now(time_zone => 'local')->strftime('%F %T');
+ my $msg_str = "$date_str [$$] $level $msg\n";
+
+ if ($die) {
+ die $msg_str;
+
+ } else {
+ if ($level eq 'ERR' or $level eq 'WARNING') {
+ # always copy problem messages to stdout
+ warn $msg_str; # avoid dupe messages
+ } else {
+ print $msg_str;
+ }
+ }
+}
+
+
+sub connect_db {
+ $db_handle = DBI->connect(
+ "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
+ $db_user, $db_pass, {
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 1,
+ pg_expand_array => 0,
+ pg_enable_utf8 => 1
+ }
+ ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
+}
+
+sub process_zip_file {
+
+ my $zip = Archive::Zip->new();
+
+ announce('ERR', "Failed to read $file", 1)
+ unless $zip->read($file) == AZ_OK;
+
+ # Avoid processing XLS and HTM files.
+ # All of the MARC files end in .UTF8.
+ for my $member ($zip->membersMatching('.*(\.UTF8|\.MRC)')) {
+
+ my $basename = basename($member->fileName());
+
+ announce('INFO', "Extracting file $basename");
+
+ my $local_file = "$working_dir/$basename";
+
+ announce('ERR', "Unable to extract to file: $local_file", 1)
+ unless $member->extractToFileNamed($local_file) == AZ_OK;
+
+ my $marc_batch = MARC::File::USMARC->in($local_file, 'UTF8')
+ or announce('ERR', "Unable to read $local_file as MARC", 1);
+
+ if ($basename =~ /BIB/) {
+
+ handle_modified_bibs($marc_batch);
+
+ } elsif ($basename =~ /DEL/) {
+
+ handle_deleted_auths($marc_batch);
+
+ } elsif ($basename =~ /CHG|NEW|AUTH/) {
+
+ handle_modified_auths($marc_batch);
+
+ } else {
+ announce('WARNING', "Un-handled file type: $basename");
+ }
+ }
+}
+
+# Returns ID's of bib records that have been modified since the export date.
+my @modified_bibs;
+my $mod_searched = 0;
+sub find_modified_bibs {
+
+ return if $mod_searched;
+ $mod_searched = 1;
+
+ my $id_arrays = $db_handle->selectall_arrayref(<<" SQL");
+ SELECT id
+ FROM biblio.record_entry
+ WHERE NOT deleted AND edit_date >= '$export_date'
+ SQL
+
+ @modified_bibs = map {$_->[0]} @$id_arrays;
+
+ announce('INFO', scalar(@modified_bibs)." bibs modified since export");
+}
+
+
+
+# 1. Bibs that have been modified by Backstage and locally are written
+# to the --bib-collision-file as MARC for later processing.
+# 2. Bibs that have only been modified by Backstage are updated
+# directly in the database.
+sub handle_modified_bibs {
+ my $marc_batch = shift;
+
+ find_modified_bibs();
+
+ while (my $record = $marc_batch->next()) {
+ my $bib_id = $record->subfield('901', 'c');
+
+ if (!$bib_id) {
+ announce('ERR', "Bib record has no 901c (ID) value. Skipping");
+ next;
+ }
+
+ if (grep {$bib_id eq $_} @modified_bibs) {
+ # Bib was edited by both parties. Save to external file
+ # for later processing.
+
+ write_bib_collision($record);
+
+ } else {
+ # Update our copy of the record.
+
+ my $marcxml = clean_marc($record->as_xml_record());
+ update_bib($marcxml, $bib_id);
+ }
+ }
+}
+
+sub update_bib {
+ my $marcxml = shift;
+ my $bib_id = shift;
+
+ eval { $mod_bibs_sth->execute($marcxml, $bib_id) };
+
+ if ($@) {
+ announce('ERR', "Error updating biblio record: $@ : $marcxml");
+ return;
+ }
+
+ $mod_bibs_ctr++;
+
+ announce('INFO', "Updated $mod_bibs_ctr bib records")
+ if $mod_bibs_ctr % $log_mod == 0;
+}
+
+sub write_bib_collision {
+ my $record = shift;
+
+ my $filename = "$working_dir/$bib_collision_file";
+
+ open(BIBS_FILE, ">>$filename") or
+ announce('ERR', "Cannot open bib collision file: $filename : $!", 1);
+
+ binmode(BIBS_FILE, ":utf8");
+
+ print BIBS_FILE $record->as_usmarc();
+
+ close BIBS_FILE or
+ announce('WARNING', "Error closing bib collision file: $filename : $!");
+
+ $col_bibs_ctr++;
+
+ announce('INFO', "Dumped $col_bibs_ctr bib collisions to file")
+ if $col_bibs_ctr % $log_mod == 0;
+}
+
+sub handle_deleted_auths {
+ my $marc_batch = shift;
+
+ while (my $record = $marc_batch->next()) {
+ my @matches = find_matching_auths($record);
+
+ for my $auth_id (@matches) {
+
+ eval {
+ # 2 mods.. wrap in transaction? (see autocommit)
+ $del_auth_sth->execute($auth_id);
+ $delmod_auth_sth->execute($auth_id);
+ };
+
+ if ($@) {
+ announce(
+ 'ERR', "Error deleting authority record: $@ : $auth_id");
+ next;
+ }
+
+ $del_auth_ctr++;
+
+ announce('INFO', "Deleted $del_auth_ctr authority records")
+ if $del_auth_ctr % $log_mod == 0;
+ }
+ }
+}
+
+sub handle_modified_auths {
+ my $marc_batch = shift;
+
+ while (my $record = $marc_batch->next()) {
+
+ modify_auth_005($record);
+
+ my @matches = find_matching_auths($record);
+ push(@matches, find_replaced_auths($record));
+
+ my $marcxml = clean_marc($record->as_xml_record());
+
+ if (@matches) {
+ update_auth($marcxml, $_) for @matches;
+ } else {
+ insert_auth($marcxml);
+ }
+ }
+}
+
+# Update the 005 field to the current date
+sub modify_auth_005 {
+ my $record = shift;
+ my $field_005 = $record->field('005');
+
+ # MARC 005-formatted date value
+ my $now_date = DateTime->now(
+ time_zone => 'local')->strftime('%Y%m%d%H%M%S.0');
+
+ if ($field_005) {
+ $field_005->update($now_date);
+
+ } else {
+ $field_005 = MARC::Field->new('005', $now_date);
+ $record->insert_fields_ordered($field_005);
+ }
+}
+
+
+sub update_auth {
+ my $marcxml = shift;
+ my $auth_id = shift;
+
+ eval { $mod_auth_sth->execute($marcxml, $auth_id) };
+
+ if ($@) {
+ announce('ERR', "Error updating authority record: $@ : $marcxml");
+ return;
+ }
+
+ $mod_auth_ctr++;
+
+ announce('INFO', "Updated $mod_auth_ctr authority records")
+ if $mod_auth_ctr % $log_mod == 0;
+}
+
+sub insert_auth {
+ my $marcxml = shift;
+
+ eval { $new_auth_sth->execute($marcxml, "IMPORT-" . time) };
+
+ if ($@) {
+ announce('ERR',
+ "Error creating new authority record: $@ : $marcxml");
+ return;
+ }
+
+ $new_auth_ctr++;
+
+ announce('INFO', "Created $new_auth_ctr authority records")
+ if $new_auth_ctr % $log_mod == 0;
+}
+
+# Return ID's of authority records that should be replaced by the
+# current record. Checks for records whose 010$a equals the 010$z of
+# the current record.
+# 010$z == Canceled/invalid LC control number
+sub find_replaced_auths {
+ my $record = shift;
+
+ my $subfield = $record->subfield('010', 'z');
+ return () unless $subfield;
+
+ $match_auth_sth->execute('010', $subfield);
+ my $matches = $match_auth_sth->fetchall_arrayref;
+ my @ids = map {$_->[0]} @$matches;
+
+ announce('INFO', "Auth 010z=$subfield matched records: @ids") if @ids;
+
+ return @ids;
+}
+
+# Return ID's of matching authority records. Matching tries:
+# 001 -> 010a -> 035a.
+sub find_matching_auths {
+ my $record = shift;
+
+ my $tag = '001';
+ my $subfield;
+
+ # 001 test requires its own SQL query
+ if (my $field = $record->field($tag)) {
+ if ($subfield = $field->data) {
+
+ $match_auth_001_sth->execute($subfield);
+ my $matches = $match_auth_001_sth->fetchall_arrayref;
+ my @ids = map {$_->[0]} @$matches;
+ announce('INFO',
+ "Auth 001=$subfield matched records: @ids") if @ids;
+ return @ids;
+ }
+ }
+
+ $tag = '010';
+ $subfield = $record->subfield($tag, 'a');
+
+ if (!$subfield) {
+ $tag = '035';
+ $subfield = $record->subfield($tag, 'a');
+ }
+
+ return () unless $subfield;
+
+ $match_auth_sth->execute($tag, $subfield);
+ my $matches = $match_auth_sth->fetchall_arrayref;
+
+ my @ids = map {$_->[0]} @$matches;
+ announce('INFO', "Auth ${tag}a=$subfield matched records: @ids") if @ids;
+
+ return @ids;
+}
+
+sub prepare_statements {
+
+ $del_auth_sth = $db_handle->prepare(<<" SQL");
+ DELETE FROM authority.record_entry WHERE id = ?
+ SQL
+
+ $delmod_auth_sth = $db_handle->prepare(<<" SQL");
+ UPDATE authority.record_entry
+ SET edit_date = NOW() WHERE id = ?
+ SQL
+
+ $mod_bibs_sth = $db_handle->prepare(<<" SQL");
+ UPDATE biblio.record_entry
+ SET marc = ?, edit_date = NOW()
+ WHERE id = ?
+ SQL
+
+ $mod_auth_sth = $db_handle->prepare(<<" SQL");
+ UPDATE authority.record_entry
+ SET marc = ?, edit_date = NOW()
+ WHERE id = ?
+ SQL
+
+ $new_auth_sth = $db_handle->prepare(<<" SQL");
+ INSERT INTO authority.record_entry (marc, last_xact_id)
+ VALUES (?, ?)
+ SQL
+
+ $match_auth_sth = $db_handle->prepare(<<" SQL");
+ SELECT DISTINCT(rec.id)
+ FROM authority.record_entry rec
+ JOIN authority.full_rec frec ON (frec.record = rec.id)
+ WHERE
+ NOT rec.deleted
+ AND frec.tag = ?
+ AND frec.subfield = 'a'
+ AND frec.value = NACO_NORMALIZE(?, 'a')
+ SQL
+
+ $match_auth_001_sth = $db_handle->prepare(<<" SQL");
+ SELECT DISTINCT(rec.id)
+ FROM authority.record_entry rec
+ JOIN authority.full_rec frec ON (frec.record = rec.id)
+ WHERE
+ NOT rec.deleted
+ AND frec.tag = '001'
+ AND frec.value = ?
+ SQL
+}
+
+openlog($syslog_ident, $syslog_ops, $syslog_facility);
+connect_db();
+prepare_statements();
+process_zip_file();
+
+$new_auth_sth->finish;
+$mod_auth_sth->finish;
+$del_auth_sth->finish;
+$delmod_auth_sth->finish;
+$match_auth_sth->finish;
+$match_auth_001_sth->finish;
+$mod_bibs_sth->finish;
+
+$db_handle->disconnect;
+
--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use MARC::Charset;
+use OpenSRF::System;
+use OpenILS::Utils::Fieldmapper;
+use OpenSRF::Utils::SettingsClient;
+use OpenSRF::EX qw/:try/;
+use Encode;
+use Unicode::Normalize;
+use OpenILS::Utils::Normalize;
+use Data::Dumper;
+use Pod::Usage qw/ pod2usage /;
+
+$ENV{OSRF_LOG_CLIENT} = 1;
+
+$Data::Dumper::Indent = 0;
+MARC::Charset->assume_unicode(1);
+
+my $acsaf_cache = {};
+
+sub get_acsaf {
+ my ($e, $id) = @_;
+
+ $acsaf_cache->{$id} ||=
+ $e->retrieve_authority_control_set_authority_field([
+ $id,
+ {flesh => 1, flesh_fields => {acsaf => ["main_entry"]}}
+ ]);
+ return $acsaf_cache->{$id};
+}
+
+sub matchable_string {
+ my ($field, $sf_list, $joiner) = @_;
+ $joiner ||= ' ';
+
+ return join($joiner, map { $field->subfield($_) } split "", $sf_list);
+}
+
+# ########### main
+my ($start_id, $end_id);
+my $bootstrap = '/openils/conf/opensrf_core.xml';
+my @records;
+my $verbose;
+my $input_file ='';
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_user = $ENV{PGDATABASE} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD};
+my $links_removed = 0;
+my $links_added = 0;
+my $CNI = 'KCLS';
+
+my %options;
+my $result = GetOptions(
+ \%options,
+ 'configuration=s' => \$bootstrap,
+ 'record=i' => \@records,
+ 'all', 'help',
+ 'start_id=i' => \$start_id,
+ 'end_id=i' => \$end_id,
+ 'file=s' => \$input_file,
+ 'verbose' => \$verbose,
+ "db-host=s" => \$db_host,
+ "db-user=s" => \$db_user,
+ "db-pass=s" => \$db_pass,
+ "db-port=s" => \$db_port
+);
+
+sub announce {
+ my $msg = shift;
+ return unless $verbose;
+ print DateTime->now(time_zone => 'local')->strftime('%F %T') . " $msg\n";
+}
+
+pod2usage(0) if not $result or $options{help};
+
+OpenSRF::System->bootstrap_client(config_file => $bootstrap);
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+# must be loaded and initialized after the IDL is parsed
+
+use OpenILS::Utils::CStoreEditor;
+OpenILS::Utils::CStoreEditor::init();
+
+my $e = OpenILS::Utils::CStoreEditor->new;
+
+my $query = q{
+ SELECT
+ source,
+ ARRAY_TO_STRING(ARRAY_AGG(target || ',' || field), ';') AS links
+ FROM (
+ SELECT sh1.record AS target,
+ sh2.record AS source,
+ sh2.atag AS field
+ FROM authority.simple_heading sh1
+ JOIN authority.simple_heading sh2 USING (sort_value)
+ JOIN authority.control_set_authority_field af1 ON
+ (sh1.atag = af1.id AND af1.main_entry IS NULL)
+ JOIN authority.control_set_authority_field af2 ON
+ (sh2.atag = af2.id AND af2.main_entry IS NOT NULL
+ AND af2.linking_subfield IS NOT NULL)
+ %s -- where clause here
+ -- Ignore authority.authority_linking rows since we want to
+ -- rebuild all links, which may mean deleting bogus links.
+ -- EXCEPT SELECT target, source, field FROM authority.authority_linking
+ -- order by source for consistent testing
+ ) x GROUP BY 1 ORDER BY source
+};
+
+my @bind_params;
+if (@records) {
+ $query = sprintf($query, "WHERE sh2.record = ?");
+ @bind_params = @records; # should be just one scalar in this array.
+} elsif ($options{all}) {
+ $query = sprintf($query, ""); # no where clause
+} elsif ($start_id and $end_id) {
+ $query = sprintf($query, "WHERE sh2.record BETWEEN ? AND ?");
+ @bind_params = ($start_id, $end_id);
+
+} elsif ($input_file) {
+ # Load authority record IDs from a file.
+ announce("Reading authority record IDs from $input_file");
+
+ open FILE, "<", $input_file or die "Can't open file $input_file\n";
+ while(<FILE>) {
+ chomp;
+ push(@records, $_) if $_;
+ }
+ close FILE;
+
+ announce("Read ".scalar(@records)." from $input_file");
+
+ # No bind_params needed.
+ my $recstr = join(',', @records);
+ $query = sprintf($query, "WHERE sh2.record IN ($recstr)");
+
+} else {
+ pod2usage(0);
+}
+
+announce("SQL, params: ", Dumper($query, \@bind_params));
+
+my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
+my $dbh = DBI->connect($dsn, $db_user, $db_pass);
+$dbh->do('SET statement_timeout = 0');
+
+my $sth = $dbh->prepare($query);
+
+announce("Executing query ...");
+$sth->execute(@bind_params);
+
+my $problems = 0;
+
+if (@records) {
+ announce("Processing ".scalar(@records)." records");
+} elsif ($start_id) {
+ announce("Processing records $start_id => $end_id");
+} else {
+ announce("Processing all records");
+}
+
+my $total_records = 0;
+while (my ($src, $links) = $sth->fetchrow_array) {
+ announce("--------------------");
+ announce("Processing authority source record $src");
+ $total_records++;
+
+ try {
+
+ my $src_rec = $e->retrieve_authority_record_entry($src);
+ if (!$src_rec) {
+ warn "Unable to load source authority record $src\n";
+ next;
+ }
+
+ my $src_marc = MARC::Record->new_from_xml($src_rec->marc);
+ my $auth_src_thesaurus = substr($src_marc->field('008')->data(), 11, 1);
+ announce("Source record thesaurus value=$auth_src_thesaurus");
+
+ my $changed = 0;
+ my %tags_seen;
+ for my $link (split ';', $links) {
+ my ($target, $field_id) = split ',', $link;
+
+ next if $target eq $src_rec->id;
+
+ announce("Target: $target, field_id: $field_id");
+
+ my $target_rec = $e->retrieve_authority_record_entry($target);
+ if (!$target_rec) {
+ warn "Unable to load authority record $target. Skipping\n";
+ next;
+ }
+
+ my $target_marc = MARC::Record->new_from_xml($target_rec->marc);
+
+ my $auth_target_thesaurus =
+ substr($target_marc->field('008')->data(), 11, 1);
+
+ announce("Target record thesaurus value=$auth_target_thesaurus");
+
+ if ($auth_src_thesaurus ne $auth_target_thesaurus) {
+ announce("Thesauri for source/target records do not match. Skipping..");
+ next;
+ }
+
+ my $acsaf = get_acsaf($e, $field_id);
+ if (!$acsaf) {
+ warn "No authority control set field found for $field_id. Skipping\n";
+ next;
+ }
+
+ if (!$tags_seen{$acsaf->tag}) {
+ # the first time we process each tag for a given record,
+ # remove all existing auth-to-auth link subfields
+ # so they can be completely rebuilt.
+ for my $field ($src_marc->field($acsaf->tag)) {
+ if (my $val = $field->subfield('0')) {
+ announce("Removing existing subfield 0 : $val");
+ $field->delete_subfield(code => '0');
+ $changed = 1;
+ $links_removed++;
+ }
+ }
+ $tags_seen{$acsaf->tag} = 1;
+ }
+
+ # rebuild the links for the current tag
+ for my $field ($src_marc->field($acsaf->tag)) {
+
+ my $src_string = matchable_string(
+ $field, $acsaf->main_entry->display_sf_list,
+ $acsaf->main_entry->joiner
+ );
+
+ announce(sprintf(
+ "At field id=%s (tag=%s) / trying to match '%s'",
+ $acsaf->id, $acsaf->tag, $src_string));
+
+ my ($tfield) = $target_marc->field($acsaf->main_entry->tag);
+
+ if(defined $tfield) {
+
+ my $target_string = matchable_string(
+ $tfield, $acsaf->main_entry->display_sf_list,
+ $acsaf->main_entry->joiner
+ );
+
+ if ($target_string eq $src_string) {
+ announce("Got a match");
+ $field->update('0' => "($CNI)$target");
+ $changed = 1;
+ $links_added++;
+ }
+ }
+ }
+ }
+
+ if ($changed) {
+ announce("Updating authority record ".$src_rec->id);
+ $src_rec->marc(OpenILS::Utils::Normalize::clean_marc($src_marc));
+ $e->xact_begin;
+ $e->update_authority_record_entry($src_rec) or die $e->die_event;
+ $e->xact_commit;
+ }
+
+ } otherwise {
+ my $err = shift;
+ print STDERR "\nRecord # $src : ",
+ (ref $err eq "HASH" ? Dumper($err) : $err), "\n";
+
+ # Reset SAX parser so that one bad record doesn't
+ # kill the entire process.
+
+ import MARC::File::XML;
+ $problems++;
+ }
+}
+
+if (@records) {
+ announce("Processed records ". scalar(@records).
+ " records; processed=$total_records problems=$problems");
+} elsif ($start_id) {
+ announce("Processed records $start_id => $end_id; ".
+ "processed=$total_records; problems=$problems");
+} else {
+ announce("Processed all records; processed=$total_records; problems=$problems");
+}
+
+announce("links removed: $links_removed");
+announce("links added: $links_added");
+announce("delta added: ".($links_added - $links_removed));
+
+exit ($problems > 0);
+
+__END__
+
+=head1 NAME
+
+authority_authority_linker.pl - Link reference headings in authority records to main entry headings in other authority records
+
+=head1 SYNOPSIS
+
+authority_authority_linker.pl [B<--configuration>=I<opensrf_core.conf>]
+[[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
+
+=head1 DESCRIPTION
+
+For a given set of records, find authority reference headings that also
+appear as main entry headings in any other authority record. In the
+specific MARC field of the authority record (source) containing the reference
+heading with such a match in another authority record (target), add a subfield
+0 (zero) referring to the target record by ID.
+
+=head1 OPTIONS
+
+=over
+
+=item * B<-c> I<config-file>, B<--configuration>=I<config-file>
+
+Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
+Defaults to F</openils/conf/opensrf_core.xml>
+
+=item * B<-r> I<record-ID>, B<--record>=I<record-ID>
+
+Specifies the authority record ID (found in the C<authority.record_entry.id>
+column) of the B<source> record to process. This option may be specified more
+than once to process multiple records in a single run.
+
+=item * B<-a>, B<--all>
+
+Specifies that all authority records should be processed. For large
+databases, this may take an extraordinarily long amount of time.
+
+=item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
+
+Specifies the starting ID of the range of authority records to process.
+This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
+option.
+
+=item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
+
+Specifies the ending ID of the range of authority records to process.
+This option is ignored unless it is accompanied by the B<-s> or B<--start>
+option.
+
+=back
+
+=head1 EXAMPLES
+
+ authority_authority_linker.pl --start_id 1 --end_id 50000
+
+Processes the authority records with IDs between 1 and 50,000 using the
+default OpenSRF configuration file for connection information.
+
+=head1 AUTHOR
+
+Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2013 Equinox Software, Inc.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+=cut
--- /dev/null
+#!/usr/bin/perl
+# Copyright (C) 2010-2011 Laurentian University
+# Author: Dan Scott <dscott@laurentian.ca>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+# ---------------------------------------------------------------
+
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use MARC::Charset;
+use OpenSRF::System;
+use OpenILS::Utils::Fieldmapper;
+use OpenSRF::Utils::SettingsClient;
+use Encode;
+use Unicode::Normalize;
+use OpenILS::Application::AppUtils;
+use Data::Dumper;
+use Pod::Usage qw/ pod2usage /;
+use DateTime;
+
+$ENV{OSRF_LOG_CLIENT} = 1;
+
+$Data::Dumper::Indent = 0;
+MARC::Charset->assume_unicode(1);
+
+my $start_id;
+my $end_id;
+my $refresh;
+my $days_back; # days; 0 means today only
+my $modified_since; # days; 0 means today only
+my $input_file ='';
+my $bootstrap = '/openils/conf/opensrf_core.xml';
+my @records;
+my $verbose = 0;
+my $sort_desc = 0;
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_user = $ENV{PGDATABASE} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD};
+my $CNI = 'KCLS';
+
+my %options;
+my $result = GetOptions(
+ \%options,
+ 'configuration=s' => \$bootstrap,
+ 'record=i' => \@records,
+ 'refresh' => \$refresh,
+ 'start-id=i' => \$start_id,
+ 'end-id=i' => \$end_id,
+ 'days-back=i' => \$days_back,
+ 'modified-since=i' => \$modified_since,
+ 'sort-desc' => \$sort_desc,
+ 'file=s' => \$input_file,
+ 'verbose' => \$verbose,
+ "db-host=s" => \$db_host,
+ "db-user=s" => \$db_user,
+ "db-pass=s" => \$db_pass,
+ "db-port=s" => \$db_port,
+ 'all', # now assumed to be true when --file is unset
+ 'help'
+);
+
+sub announce {
+ my $msg = shift;
+ my $force = shift;
+ return unless $force || $verbose;
+ print DateTime->now->strftime('%F %T') . " [$$] $msg\n";
+}
+
+if (!$result or $options{help}) {
+ pod2usage(0);
+}
+
+OpenSRF::System->bootstrap_client(config_file => $bootstrap);
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+# must be loaded and initialized after the IDL is parsed
+use OpenILS::Utils::CStoreEditor;
+OpenILS::Utils::CStoreEditor::init();
+
+my $e = OpenILS::Utils::CStoreEditor->new;
+
+if ($input_file) {
+ open FILE, "<", $input_file or die "Can't open file " . $input_file;
+ while(<FILE>) {
+ chomp;
+ if($_) {
+ push(@records, $_);
+ }
+ }
+ close FILE;
+
+} else {
+
+
+ my $where = "WHERE not deleted";
+ $where .= " AND id >= $start_id" if $start_id;
+ $where .= " AND id <= $end_id" if $end_id;
+
+ my $order = "ORDER BY id";
+ $order .= " DESC" if $sort_desc;
+
+ if (defined $days_back or defined $modified_since) {
+ my $date_field = defined $days_back ? 'create_date' : 'edit_date';
+ my $duration = defined $days_back ? $days_back : $modified_since;
+ $where .= " AND DATE($date_field) >= DATE(NOW() - '$duration day'::INTERVAL)";
+ }
+
+ my $sql =
+ "SELECT DISTINCT(id) AS id FROM biblio.record_entry $where $order";
+
+ announce("Loading record ID's with query:\n$sql");
+
+ my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
+ my $dbh = DBI->connect($dsn, $db_user, $db_pass);
+
+ my $sth = $dbh->prepare($sql);
+ $sth->execute;
+
+ while (my $ref = $sth->fetchrow_hashref()) {
+ push(@records, $ref->{id});
+ }
+
+ $sth->finish();
+ $dbh->disconnect();
+}
+
+announce("Processing ".scalar(@records)." records");
+
+# Hash of controlled fields & subfields in bibliographic records, and their
+# corresponding controlling fields & subfields in the authority record
+#
+# So, if the bib 650$a can be controlled by an auth 150$a, that maps to:
+# 650 => { a => { 150 => 'a'}}
+my %controllees = (
+ 100 => { a => { 100 => 'a' },
+ b => { 100 => 'b' },
+ c => { 100 => 'c' },
+ d => { 100 => 'd' },
+ #f => { 100 => 'f' },
+ g => { 100 => 'g' },
+ h => { 100 => 'h' },
+ k => { 100 => 'k' },
+ l => { 100 => 'l' },
+ m => { 100 => 'm' },
+ n => { 100 => 'n' },
+ o => { 100 => 'o' },
+ p => { 100 => 'p' },
+ q => { 100 => 'q' },
+ r => { 100 => 'r' },
+ s => { 100 => 's' },
+ t => { 100 => 't' },
+ y => { 100 => 'y' },
+ z => { 100 => 'z' },
+ },
+ 110 => { a => { 110 => 'a' },
+ b => { 110 => 'b' },
+ c => { 110 => 'c' },
+ d => { 110 => 'd' },
+ #f => { 110 => 'f' },
+ g => { 110 => 'g' },
+ h => { 110 => 'h' },
+ k => { 110 => 'k' },
+ l => { 110 => 'l' },
+ m => { 110 => 'm' },
+ n => { 110 => 'n' },
+ o => { 110 => 'o' },
+ p => { 110 => 'p' },
+ r => { 110 => 'r' },
+ s => { 110 => 's' },
+ t => { 110 => 't' },
+ y => { 110 => 'y' },
+ z => { 110 => 'z' },
+ },
+ 111 => { a => { 111 => 'a' },
+ b => { 111 => 'b' },
+ c => { 111 => 'c' },
+ d => { 111 => 'd' },
+ e => { 111 => 'e' },
+ #f => { 111 => 'f' },
+ g => { 111 => 'g' },
+ h => { 111 => 'h' },
+ k => { 111 => 'k' },
+ l => { 111 => 'l' },
+ m => { 111 => 'm' },
+ n => { 111 => 'n' },
+ o => { 111 => 'o' },
+ p => { 111 => 'p' },
+ q => { 111 => 'q' },
+ r => { 111 => 'r' },
+ s => { 111 => 's' },
+ t => { 111 => 't' },
+ u => { 111 => 'u' },
+ y => { 111 => 'y' },
+ z => { 111 => 'z' },
+ },
+ 130 => { a => { 130 => 'a' },
+ d => { 130 => 'd' },
+ #f => { 130 => 'f' },
+ g => { 130 => 'g' },
+ h => { 130 => 'h' },
+ k => { 130 => 'k' },
+ l => { 130 => 'l' },
+ m => { 130 => 'm' },
+ n => { 130 => 'n' },
+ o => { 130 => 'o' },
+ p => { 130 => 'p' },
+ r => { 130 => 'r' },
+ s => { 130 => 's' },
+ t => { 130 => 't' },
+ x => { 130 => 'x' },
+ y => { 130 => 'y' },
+ z => { 130 => 'z' },
+ },
+ 400 => { a => { 100 => 'a' },
+ b => { 100 => 'b' },
+ c => { 100 => 'c' },
+ d => { 100 => 'd' },
+ #f => { 100 => 'f' },
+ g => { 100 => 'g' },
+ h => { 100 => 'h' },
+ k => { 100 => 'k' },
+ l => { 100 => 'l' },
+ m => { 100 => 'm' },
+ n => { 100 => 'n' },
+ o => { 100 => 'o' },
+ p => { 100 => 'p' },
+ q => { 100 => 'q' },
+ r => { 100 => 'r' },
+ s => { 100 => 's' },
+ t => { 100 => 't' },
+ y => { 100 => 'y' },
+ z => { 100 => 'z' },
+ },
+ 410 => { a => { 110 => 'a' },
+ b => { 110 => 'b' },
+ c => { 110 => 'c' },
+ d => { 110 => 'd' },
+ #f => { 110 => 'f' },
+ g => { 110 => 'g' },
+ h => { 110 => 'h' },
+ k => { 110 => 'k' },
+ l => { 110 => 'l' },
+ m => { 110 => 'm' },
+ n => { 110 => 'n' },
+ o => { 110 => 'o' },
+ p => { 110 => 'p' },
+ r => { 110 => 'r' },
+ s => { 110 => 's' },
+ t => { 110 => 't' },
+ y => { 110 => 'y' },
+ z => { 110 => 'z' },
+ },
+ 411 => { a => { 111 => 'a' },
+ b => { 111 => 'b' },
+ c => { 111 => 'c' },
+ d => { 111 => 'd' },
+ e => { 111 => 'e' },
+ #f => { 111 => 'f' },
+ g => { 111 => 'g' },
+ h => { 111 => 'h' },
+ k => { 111 => 'k' },
+ l => { 111 => 'l' },
+ m => { 111 => 'm' },
+ n => { 111 => 'n' },
+ o => { 111 => 'o' },
+ p => { 111 => 'p' },
+ q => { 111 => 'q' },
+ r => { 111 => 'r' },
+ s => { 111 => 's' },
+ t => { 111 => 't' },
+ u => { 111 => 'u' },
+ y => { 111 => 'y' },
+ z => { 111 => 'z' },
+ },
+ 600 => { a => { 100 => 'a' },
+ b => { 100 => 'b' },
+ c => { 100 => 'c' },
+ d => { 100 => 'd' },
+ #f => { 100 => 'f' },
+ g => { 100 => 'g' },
+ h => { 100 => 'h' },
+ k => { 100 => 'k' },
+ l => { 100 => 'l' },
+ m => { 100 => 'm' },
+ n => { 100 => 'n' },
+ o => { 100 => 'o' },
+ p => { 100 => 'p' },
+ q => { 100 => 'q' },
+ r => { 100 => 'r' },
+ s => { 100 => 's' },
+ t => { 100 => 't' },
+ v => { 100 => 'v' },
+ x => { 100 => 'x' },
+ y => { 100 => 'y' },
+ z => { 100 => 'z' },
+ },
+ 610 => { a => { 110 => 'a' },
+ b => { 110 => 'b' },
+ c => { 110 => 'c' },
+ d => { 110 => 'd' },
+ #f => { 110 => 'f' },
+ g => { 110 => 'g' },
+ h => { 110 => 'h' },
+ k => { 110 => 'k' },
+ l => { 110 => 'l' },
+ m => { 110 => 'm' },
+ n => { 110 => 'n' },
+ o => { 110 => 'o' },
+ p => { 110 => 'p' },
+ r => { 110 => 'r' },
+ s => { 110 => 's' },
+ t => { 110 => 't' },
+ v => { 110 => 'v' },
+ x => { 110 => 'x' },
+ y => { 110 => 'y' },
+ z => { 110 => 'z' },
+ },
+ 611 => { a => { 111 => 'a' },
+ b => { 111 => 'b' },
+ c => { 111 => 'c' },
+ d => { 111 => 'd' },
+ e => { 111 => 'e' },
+ #f => { 111 => 'f' },
+ g => { 111 => 'g' },
+ h => { 111 => 'h' },
+ k => { 111 => 'k' },
+ l => { 111 => 'l' },
+ m => { 111 => 'm' },
+ n => { 111 => 'n' },
+ o => { 111 => 'o' },
+ p => { 111 => 'p' },
+ q => { 111 => 'q' },
+ r => { 111 => 'r' },
+ s => { 111 => 's' },
+ t => { 111 => 't' },
+ u => { 111 => 'u' },
+ v => { 111 => 'v' },
+ x => { 111 => 'x' },
+ y => { 111 => 'y' },
+ z => { 111 => 'z' },
+ },
+ 630 => { a => { 130 => 'a' },
+ d => { 130 => 'd' },
+ #f => { 130 => 'f' },
+ g => { 130 => 'g' },
+ h => { 130 => 'h' },
+ k => { 130 => 'k' },
+ l => { 130 => 'l' },
+ m => { 130 => 'm' },
+ n => { 130 => 'n' },
+ o => { 130 => 'o' },
+ p => { 130 => 'p' },
+ r => { 130 => 'r' },
+ s => { 130 => 's' },
+ t => { 130 => 't' },
+ v => { 130 => 'v' },
+ x => { 130 => 'x' },
+ y => { 130 => 'y' },
+ z => { 130 => 'z' },
+ },
+ 650 => { a => { 150 => 'a' },
+ b => { 150 => 'b' },
+ c => { 150 => 'c' },
+ d => { 150 => 'd' },
+ v => { 150 => 'v' },
+ x => { 150 => 'x' },
+ y => { 150 => 'y' },
+ z => { 150 => 'z' },
+ },
+ 651 => { a => { 151 => 'a' },
+ b => { 151 => 'b' },
+ v => { 151 => 'v' },
+ x => { 151 => 'x' },
+ y => { 151 => 'y' },
+ z => { 151 => 'z' },
+ },
+ 655 => { a => { 155 => 'a' },
+ b => { 155 => 'b' },
+ c => { 155 => 'c' },
+ v => { 155 => 'v' },
+ x => { 155 => 'x' },
+ y => { 155 => 'y' },
+ z => { 155 => 'z' },
+ },
+ 700 => { a => { 100 => 'a' },
+ b => { 100 => 'b' },
+ c => { 100 => 'c' },
+ d => { 100 => 'd' },
+ #f => { 100 => 'f' },
+ g => { 100 => 'g' },
+ h => { 100 => 'h' },
+ k => { 100 => 'k' },
+ l => { 100 => 'l' },
+ m => { 100 => 'm' },
+ n => { 100 => 'n' },
+ o => { 100 => 'o' },
+ p => { 100 => 'p' },
+ q => { 100 => 'q' },
+ r => { 100 => 'r' },
+ s => { 100 => 's' },
+ t => { 100 => 't' },
+ y => { 100 => 'y' },
+ z => { 100 => 'z' },
+ },
+ 710 => { a => { 110 => 'a' },
+ b => { 110 => 'b' },
+ c => { 110 => 'c' },
+ d => { 110 => 'd' },
+ #f => { 110 => 'f' },
+ g => { 110 => 'g' },
+ h => { 110 => 'h' },
+ k => { 110 => 'k' },
+ l => { 110 => 'l' },
+ m => { 110 => 'm' },
+ n => { 110 => 'n' },
+ o => { 110 => 'o' },
+ p => { 110 => 'p' },
+ r => { 110 => 'r' },
+ s => { 110 => 's' },
+ t => { 110 => 't' },
+ y => { 110 => 'y' },
+ z => { 110 => 'z' },
+ },
+ 711 => { a => { 111 => 'a' },
+ b => { 111 => 'b' },
+ c => { 111 => 'c' },
+ d => { 111 => 'd' },
+ e => { 111 => 'e' },
+ #f => { 111 => 'f' },
+ g => { 111 => 'g' },
+ h => { 111 => 'h' },
+ k => { 111 => 'k' },
+ l => { 111 => 'l' },
+ m => { 111 => 'm' },
+ n => { 111 => 'n' },
+ o => { 111 => 'o' },
+ p => { 111 => 'p' },
+ q => { 111 => 'q' },
+ r => { 111 => 'r' },
+ s => { 111 => 's' },
+ t => { 111 => 't' },
+ u => { 111 => 'u' },
+ y => { 111 => 'y' },
+ z => { 111 => 'z' },
+ },
+ 730 => { a => { 130 => 'a' },
+ d => { 130 => 'd' },
+ #f => { 130 => 'f' },
+ g => { 130 => 'g' },
+ h => { 130 => 'h' },
+ k => { 130 => 'k' },
+ l => { 130 => 'l' },
+ m => { 130 => 'm' },
+ n => { 130 => 'n' },
+ o => { 130 => 'o' },
+ p => { 130 => 'p' },
+ r => { 130 => 'r' },
+ s => { 130 => 's' },
+ t => { 130 => 't' },
+ y => { 130 => 'y' },
+ z => { 130 => 'z' },
+ },
+ 800 => { a => { 100 => 'a' },
+ b => { 100 => 'b' },
+ c => { 100 => 'c' },
+ d => { 100 => 'd' },
+ #f => { 100 => 'f' },
+ g => { 100 => 'g' },
+ h => { 100 => 'h' },
+ k => { 100 => 'k' },
+ l => { 100 => 'l' },
+ m => { 100 => 'm' },
+ n => { 100 => 'n' },
+ o => { 100 => 'o' },
+ p => { 100 => 'p' },
+ q => { 100 => 'q' },
+ r => { 100 => 'r' },
+ s => { 100 => 's' },
+ t => { 100 => 't' },
+ y => { 100 => 'y' },
+ z => { 100 => 'z' },
+ },
+ 810 => { a => { 110 => 'a' },
+ b => { 110 => 'b' },
+ c => { 110 => 'c' },
+ d => { 110 => 'd' },
+ #f => { 110 => 'f' },
+ g => { 110 => 'g' },
+ h => { 110 => 'h' },
+ k => { 110 => 'k' },
+ l => { 110 => 'l' },
+ m => { 110 => 'm' },
+ n => { 110 => 'n' },
+ o => { 110 => 'o' },
+ p => { 110 => 'p' },
+ r => { 110 => 'r' },
+ s => { 110 => 's' },
+ t => { 110 => 't' },
+ y => { 110 => 'y' },
+ z => { 110 => 'z' },
+ },
+ 811 => { a => { 111 => 'a' },
+ b => { 111 => 'b' },
+ c => { 111 => 'c' },
+ d => { 111 => 'd' },
+ e => { 111 => 'e' },
+ #f => { 111 => 'f' },
+ g => { 111 => 'g' },
+ h => { 111 => 'h' },
+ k => { 111 => 'k' },
+ l => { 111 => 'l' },
+ m => { 111 => 'm' },
+ n => { 111 => 'n' },
+ o => { 111 => 'o' },
+ p => { 111 => 'p' },
+ q => { 111 => 'q' },
+ r => { 111 => 'r' },
+ s => { 111 => 's' },
+ t => { 111 => 't' },
+ u => { 111 => 'u' },
+ y => { 111 => 'y' },
+ z => { 111 => 'z' },
+ },
+ 830 => { a => { 130 => 'a' },
+ d => { 130 => 'd' },
+ #f => { 130 => 'f' },
+ g => { 130 => 'g' },
+ h => { 130 => 'h' },
+ k => { 130 => 'k' },
+ l => { 130 => 'l' },
+ m => { 130 => 'm' },
+ n => { 130 => 'n' },
+ o => { 130 => 'o' },
+ p => { 130 => 'p' },
+ r => { 130 => 'r' },
+ s => { 130 => 's' },
+ t => { 130 => 't' },
+ x => { 130 => 'x' },
+ y => { 130 => 'y' },
+ z => { 130 => 'z' },
+ },
+);
+
+# mapping of authority leader/11 "Subject heading system/thesaurus"
+# to the matching bib record indicator
+my %AUTH_TO_BIB_IND2 = (
+ 'a' => '0', # Library of Congress Subject Headings (ADULT)
+ 'b' => '1', # Library of Congress Subject Headings (JUVENILE)
+ 'c' => '2', # Medical Subject Headings
+ 'd' => '3', # National Agricultural Library Subject Authority File
+ 'n' => '4', # Source not specified
+ 'k' => '5', # Canadian Subject Headings
+ 'v' => '6', # Répertoire de vedettes-matière
+ 'z' => '7' # Source specified in subfield $2 / Other
+);
+
+# Produces a new 6XX ind2 value for values found in subfield $2 when the
+# original ind2 value is 7 ("Source specified in subfield $2").
+my %REMAP_BIB_SF2_TO_IND2 = (
+ lcsh => '0',
+ mesh => '2',
+ nal => '3',
+ rvm => '6'
+);
+
+my $start_time = localtime();
+
+if($input_file) {
+ announce("Start $start_time for ".scalar(@records)." records");
+} elsif($start_id) {
+ announce("Start $start_time for record range: $start_id => $end_id");
+} else {
+ announce("Start $start_time for all records");
+}
+
+# Fetch leader/008 values for authority records. Filter out any whose
+# 008 14 or 15 field are not appropriate for the requested bib tag.
+# https://www.loc.gov/marc/authority/ad008.html
+sub authority_leaders_008_14_15 {
+ my ($e, $bib_tag, $auth_ids) = @_;
+
+ my $auth_leaders = $e->json_query({
+ select => {afr => ['record', 'value']},
+ from => 'afr',
+ where => {'+afr' => {tag => '008', record => $auth_ids}}
+ });
+
+ my $index;
+ $index = 14 if $bib_tag =~ /^[17]/; # author/name record
+ $index = 15 if $bib_tag =~ /^6/; # subject record
+
+ # avoid checking any other types of authority records.
+ return $auth_leaders unless $index;
+
+ my @keepers;
+ for my $leader (@$auth_leaders) {
+ my $value = $leader->{value} || '';
+ if (substr($value, $index, 1) eq 'a') {
+ push(@keepers, $leader);
+ } else {
+ announce("Skipping authority record ".$leader->{record}.
+ " on bib $bib_tag match; 008/#14|#15 not appropriate");
+ }
+ }
+
+ return \@keepers;
+}
+
+# given a set of authority record ID's and a controlled bib field,
+# returns the ID of the first authority record in the set that
+# matches the thesaurus spec of the bib record.
+sub find_matching_auth_for_thesaurus {
+ my ($e, $bib_field, $auth_leaders) = @_;
+
+ # bib field thesaurus spec
+ my $cfield_ind2 = $bib_field->indicator(2);
+
+ announce("6XX indicator 2 value = $cfield_ind2");
+
+ my $is_local = 0;
+ if ($cfield_ind2 eq '7') {
+ # subject thesaurus code is embedded in the bib field subfield 2
+
+ $is_local = 1;
+
+ my $thesaurus = $bib_field->subfield('2') || '';
+ announce("Found local thesaurus value $thesaurus");
+
+ # if we have no special remapping value for the found thesaurus,
+ # fall back to ind2 => 7=Other.
+ $cfield_ind2 = $REMAP_BIB_SF2_TO_IND2{$thesaurus} || '7';
+
+ announce("Local thesaurus '$thesaurus' ".
+ "remapped to ind2 value '$cfield_ind2'");
+ }
+
+ my $authz_found = undef;
+ for my $leader (@$auth_leaders) {
+ my $value = $leader->{value};
+ next unless $value;
+
+ my $thesaurus = substr($value, 11, 1); # leader/11 -- zero based.
+
+ # Note for later that we encountered an authority record
+ # whose thesaurus values is z=Other.
+ $authz_found = $leader->{record} if $thesaurus eq 'z';
+
+ if ($AUTH_TO_BIB_IND2{$thesaurus} eq $cfield_ind2) {
+ announce("Found a match on thesaurus ".
+ "'$thesaurus' for " . $leader->{record});
+ return $leader->{record};
+ }
+ }
+
+ # If the bib field in question has a locally encoded thesaurus
+ # (ind2=7) and no auth record was found above via remapped
+ # thesaurus value, use the authority record with thesaurus z=Other.
+ return $authz_found if $is_local;
+
+ return undef;
+}
+
+# Returns true if the thesaurus controlling the bib field is "fast".
+sub is_fast_heading {
+ my $bib_field = shift;
+
+ if ($bib_field->tag() =~ /^65[015]/) {
+ my $ind2 = $bib_field->indicator(2) || '';
+
+ if ($ind2 eq '7') { # field controlled by "other"
+ my $thesaurus = $bib_field->subfield('2') || '';
+ return $thesaurus eq 'fast';
+ }
+ }
+
+ return 0;
+}
+
+sub update_record {
+ my ($record, $marc) = @_;
+
+ my $xml = $marc->as_xml_record();
+ $xml =~ s/\n//sgo;
+ $xml =~ s/^<\?xml.+\?\s*>//go;
+ $xml =~ s/>\s+</></go;
+ $xml =~ s/\p{Cc}//go;
+ $xml = OpenILS::Application::AppUtils->entityize($xml);
+
+ $record->marc($xml);
+
+ my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1);
+ if ($editor->update_biblio_record_entry($record)) {
+ $editor->commit();
+ } else {
+ $editor->rollback();
+ }
+}
+
+my $count = 0;
+my $total = scalar(@records);
+announce("processing $total bib records", 1);
+
+foreach my $rec_id (@records) {
+ $count++;
+
+ announce("processing $count of $total", 1) if ($count % 1000) == 0;
+
+ announce("processing bib record $rec_id [$count of $total]");
+
+ # State variable; was the record changed?
+ my $changed = 0;
+
+ # get the record
+ my $record = $e->retrieve_biblio_record_entry($rec_id);
+ next unless $record && $record->deleted eq 'f';
+
+ eval {
+ my $marc = MARC::Record->new_from_xml($record->marc());
+
+ # get the list of controlled fields
+ my @c_fields = keys %controllees;
+
+ foreach my $c_tag (@c_fields) {
+ my @c_subfields = keys %{$controllees{"$c_tag"}};
+
+ # Get the MARCXML from the record and check for controlled fields/subfields
+ my @bib_fields = ($marc->field($c_tag));
+ foreach my $bib_field (@bib_fields) {
+
+ my $sf0 = $bib_field->subfield('0') || '';
+ my $is_fast_heading = is_fast_heading($bib_field);
+
+ if ($is_fast_heading && $sf0 =~ /\)fst/) {
+ # fast heading looks OK. ignore it.
+ announce("Ignoring FAST heading field on ".
+ "rec=$rec_id and tag=$c_tag \$0 $sf0");
+ next;
+ }
+
+ if ($sf0 && $refresh) {
+ announce("Removing \$0 $sf0 for rec=$rec_id and tag=$c_tag");
+ $bib_field->delete_subfield(code => '0');
+ $changed = 1;
+ }
+
+ if ($is_fast_heading) {
+ # Update record after potentially removing a bogus
+ # fast heading above.
+ update_record($record, $marc) if $changed;
+
+ # We don't control fast headings, so there's nothing
+ # left to do. Move on to the next field...
+ announce("No linking performed on FAST heading ".
+ "field on rec=$rec_id and tag=$c_tag");
+ next;
+ }
+
+ my %match_subfields;
+ my $match_tag;
+ my @searches;
+ foreach my $c_subfield (@c_subfields) {
+ my @sf_values = $bib_field->subfield($c_subfield);
+ if (@sf_values) {
+ # Give me the first element of the list of authority controlling tags for this subfield
+ # XXX Will we need to support more than one controlling tag per subfield? Probably. That
+ # will suck. Oh well, leave that up to Ole to implement.
+ $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
+ $match_tag = $match_subfields{$c_subfield};
+ push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
+ }
+ }
+ next if !$match_tag;
+
+ announce("Searching for matches on controlled field $c_tag ".
+ "(auth tag=$match_tag): \n - ".Dumper(\@searches));
+
+ my @tags = ($match_tag);
+
+ # Now we've built up a complete set of matching controlled
+ # subfields for this particular field; let's check to see if
+ # we have a matching authority record
+ my $session = OpenSRF::AppSession->create("open-ils.search");
+ my $validates = $session->request("open-ils.search.authority.validate.tag.id_list",
+ "tags", \@tags, "searches", \@searches
+ )->gather();
+ $session->disconnect();
+
+
+ # Protect against failed (error condition) search request
+ if (!$validates) {
+ print STDERR "Search for matching authority failed; record # $rec_id\n";
+ next if (!$changed);
+ }
+
+ announce("Match query returned @$validates");
+
+ # No matches found. Nothing left to do for this field.
+ next if scalar(@$validates) == 0;
+
+ # Iterate through the returned authority record IDs to delete any
+ # matching $0 subfields already in the bib record
+ foreach my $auth_zero (@$validates) {
+ $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
+ }
+
+ # Find the best authority record to use for linking.
+
+ my $auth_leaders;
+ if ($bib_field->tag =~ /^[167]/) {
+ # For 1XX, 6XX, and 7XX bib fields, only link to
+ # authority records whose leader/008 positions
+ # 14 and 15 are coded to allow use as a name/author
+ # or subject record, depending.
+
+ $auth_leaders = authority_leaders_008_14_15(
+ $e, $bib_field->tag, $validates);
+
+ $validates = [map {$_->{record}} @$auth_leaders];
+ }
+
+ my $auth_id;
+
+ if ($bib_field->tag() =~ /^65[015]/) {
+ # Using the indicator-2 value from the controlled bib
+ # field, find the first authority in the list of matches
+ # that uses the same thesaurus. If no such authority
+ # is found, no matching occurs.
+ # TODO: perhaps this step should be part of the
+ # validation API search call above.
+
+ $auth_id = find_matching_auth_for_thesaurus(
+ $e, $bib_field, $auth_leaders) || '';
+
+ } else {
+
+ # For all other controlled fields, use the first
+ # authority record in the result set.
+ $auth_id = $validates->[0];
+ }
+
+ # Don't exit here just because we have no $auth_id. The
+ # bib field could have been changed above in the cleanup /
+ # delete phase.
+
+ if ($auth_id) {
+ # Add the auth ID and control number agency info from the
+ # matching authority record to the controlled bib field.
+ $changed = 1;
+ $bib_field->add_subfields('0' => "($CNI)$auth_id");
+ announce("auth=$auth_id cni=$CNI. It's a match!");
+ }
+ }
+ }
+
+ update_record($record, $marc) if $changed;
+
+ };
+
+ if ($@) {
+ print STDERR "\nRecord # $rec_id : $@\n";
+ import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire process
+ }
+}
+
+my $end_time = localtime();
+
+if($input_file) {
+ announce("Stop $end_time for ".scalar(@records)." records");
+} elsif($start_id) {
+ announce("Start $end_time for record range: $start_id => $end_id");
+} else {
+ announce("Start $end_time for all records");
+}
+
+__END__
+
+=head1 NAME
+
+authority_control_fields.pl - Controls fields in bibliographic records with authorities in Evergreen
+
+=head1 SYNOPSIS
+
+C<authority_control_fields.pl> [B<--configuration>=I<opensrf_core.conf>] [B<--refresh>]
+[[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
+
+=head1 DESCRIPTION
+
+For a given set of records:
+
+=over
+
+=item * Iterate through the list of fields that are controlled fields
+
+=item * Iterate through the list of subfields that are controlled for
+that given field
+
+=item * Search for a matching authority record for that combination of
+field + subfield(s)
+
+=over
+
+=item * If we find a match, then add a $0 subfield to that field identifying
+the controlling authority record
+
+=item * If we do not find a match, then insert a row into an "uncontrolled"
+table identifying the record ID, field, and subfield(s) that were not controlled
+
+=back
+
+=item * Iterate through the list of floating subdivisions
+
+=over
+
+=item * If we find a match, then add a $0 subfield to that field identifying
+the controlling authority record
+
+=item * If we do not find a match, then insert a row into an "uncontrolled"
+table identifying the record ID, field, and subfield(s) that were not controlled
+
+=back
+
+=item * If we changed the record, update it in the database
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item * B<-f>, B<--file>
+
+Specifies a file of bibs ids to link.
+
+=item * B<-c> I<config-file>, B<--configuration>=I<config-file>
+
+Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
+Defaults to F</openils/conf/opensrf_core.xml>
+
+=item * B<-r> I<record-ID>, B<--record>=I<record-ID>
+
+Specifies the bibliographic record ID (found in the C<biblio.record_entry.id>
+column) of the record to process. This option may be specified more than once
+to process multiple records in a single run.
+
+=item * B<-a>, B<--all>
+
+Specifies that all bibliographic records should be processed. For large
+databases, this may take an extraordinarily long amount of time.
+
+=item * B<-r>, B<--refresh>
+
+Specifies that all authority links should be removed from the target
+bibliographic record(s). This will effectively rewrite all authority
+linking anew.
+
+=item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
+
+Specifies the starting ID of the range of bibliographic records to process.
+This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
+option.
+
+=item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
+
+Specifies the ending ID of the range of bibliographic records to process.
+This option is ignored unless it is accompanied by the B<-s> or B<--start>
+option.
+
+=back
+
+=head1 EXAMPLES
+
+ authority_control_fields.pl --start_id 1 --end_id 50000
+
+Processes the bibliographic records with IDs between 1 and 50,000 using the
+default OpenSRF configuration file for connection information.
+
+=head1 AUTHOR
+
+Dan Scott <dscott@laurentian.ca>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2010-2011 by Dan Scott
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+=cut
+
--- /dev/null
+#!/usr/bin/perl
+# ----------------------------------------------------------------------
+# Find bib records matching the requested criteria for linking.
+# Bib IDs are exported to one or more batch files for future processing.
+# ----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use DateTime;
+
+my $db_handle;
+my $counter = 0;
+
+# options
+my $help;
+my $modified_since;
+my $exported_since;
+my $batch_size = 10000;
+my $start_id;
+my $end_id;
+my $count_only;
+my $out_dir = '/tmp';
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_user = $ENV{PGUSER} || 'evergreen';
+my $db_name = $ENV{PGDATABASE} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD};
+
+my $opt_result = GetOptions(
+ 'modified-since=s' => \$modified_since,
+ 'exported-since=s' => \$exported_since,
+ 'start-id=i' => \$start_id,
+ 'end-id=i' => \$end_id,
+ 'batch-size=i' => \$batch_size,
+ 'count-only' => \$count_only,
+ 'out-dir=s' => \$out_dir,
+ "db-host=s" => \$db_host,
+ "db-user=s" => \$db_user,
+ "db-pass=s" => \$db_pass,
+ "db-port=s" => \$db_port,
+ 'help' => \$help
+);
+
+sub announce {
+ my $msg = shift;
+ print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n";
+}
+
+sub help {
+ print <<HELP;
+ Find IDs for bib records based on various criteria. Write bib
+ IDs to batch files. Batch files are placed into --out-dir and
+ named bib-ids.001, bib-ids.002, etc.
+
+ Usage:
+
+ Find
+
+ $0 --modified-since 1 --batch-size 100 \
+ --out-dir /openils/var/data/linkbibs/2016-12-01
+
+ Options:
+
+ --modified-since <YYYY-MM-DD>
+ Limit bibs to those modifed since the specified date.
+
+ --exported-since <YYYY-MM-DD>
+ Limit bibs to those exported since the specified date.
+ Export date is based on data found in the
+ metabib.bib_export_data table.
+
+ --start-id <id>
+ Limit bibs to those whose ID is no less than <id>
+
+ --end-id <id>
+ Limit bibs to those whose ID is no greater than <id>
+
+ --out-dir [/tmp]
+ Output directory.
+
+ --batch-size
+ Number of bib IDs to write to each batch file.
+
+ --count-only
+ Print the total number of records that would be added
+ to batch files without adding to any batch files.
+
+ --db-host
+ --db-user
+ --db-pass
+ --db-port
+ Database connection params. PG environment variables are
+ also inspected for values. When all else fails, try to
+ connect to database evergreen\@localhost
+HELP
+ exit 0;
+}
+
+help() if $help || !$opt_result;
+
+sub connect_db {
+ $db_handle = DBI->connect(
+ "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
+ $db_user, $db_pass, {
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 1,
+ pg_expand_array => 0,
+ pg_enable_utf8 => 1
+ }
+ ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
+}
+
+connect_db();
+
+# ----------------------------------------------------------------------
+my $from = 'FROM biblio.record_entry bre';
+
+my $where = 'WHERE NOT bre.deleted';
+$where .= " AND bre.id >= $start_id" if $start_id;
+$where .= " AND bre.id <= $end_id" if $end_id;
+
+if ($exported_since) {
+ $where .= " AND bed.export_date > '$exported_since'";
+ $from .= " JOIN metabib.bib_export_data bed ON (bed.bib = bre.id)";
+}
+
+my $sql = <<SQL;
+ SELECT bre.id
+ $from
+ $where
+ ORDER BY bre.id DESC;
+SQL
+
+my $sth = $db_handle->prepare($sql);
+$sth->execute;
+
+my $batch_file;
+sub open_batch_file {
+ my $path = shift;
+ announce("Starting new batch file: $path");
+
+ close $batch_file if $batch_file;
+
+ open $batch_file, '>', $path or
+ die "Cannot open batch file for writing: $!\n";
+}
+
+my $ctr = 0;
+my $batch = 0;
+while (my $ref = $sth->fetchrow_hashref()) {
+ $ctr++;
+ next if $count_only;
+
+ if (( ($ctr - 1) % $batch_size) == 0) {
+ my $path = sprintf("$out_dir/bib-ids.%0.3d", $batch);
+ open_batch_file($path);
+ $batch++;
+ }
+
+ print $batch_file $ref->{id} . "\n";
+}
+
+close $batch_file if $batch_file;
+$sth->finish;
+
+announce("Found $ctr bib records");
+
--- /dev/null
+#!/bin/bash
+set -eu
+PROC_COUNT=3
+BATCHES_PER_PROC=3
+BATCH_DIR="/openils/var/data/linkbibs"
+
+DOW=$(date +%u);
+[ $DOW == 7 ] && DOW=0; # make Sunday = 0
+
+BATCH=$(echo "$PROC_COUNT * $BATCHES_PER_PROC * $DOW" | bc);
+
+echo "Starting at batch number $BATCH";
+
+function execute_background_proc {
+ START=$1
+ for batch in $(seq 1 $BATCHES_PER_PROC); do
+ FILE=$(printf "$BATCH_DIR/bib-ids.%0.3d" $((($START + $batch))));
+ echo "Linking bib file $FILE"
+ perl ./authority_control_fields.pl --refresh --file $FILE
+ done;
+}
+
+for PROC in $(seq 1 $PROC_COUNT); do
+ (execute_background_proc $BATCH) &
+ BATCH=$((($BATCH + BATCHES_PER_PROC)));
+done
+
+wait;
+
+echo "Done processing all batches"
+
--- /dev/null
+#!/usr/bin/perl
+# ----------------------------------------------------------------------
+# Find authority records newer than a specified age. Once found,
+# run each through the auth-to-auth linking process. Then locate
+# bib records that we might want to link to the new records and
+# pass them off to the bib-to-auth linker.
+# ----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use DateTime;
+use Pod::Usage qw/pod2usage/;
+use Time::HiRes qw/usleep/;
+
+my @auth_ids;
+my @bib_ids;
+my $counter = 0;
+
+# options
+my $help;
+my $modified_since;
+my $max_auth_count;
+my $start_auth_id;
+my $print_auth_ids;
+my $print_bib_ids;
+my $link_auths;
+my $link_bibs;
+my $progress;
+my $db_host = $ENV{PGHOST} || 'localhost';
+my $db_port = $ENV{PGPORT} || '5432';
+my $db_user = $ENV{PGDATABASE} || 'evergreen';
+my $db_pass = $ENV{PGPASSWORD};
+
+my $opt_result = GetOptions(
+ 'modified-since=i' => \$modified_since,
+ 'max-auth-count=i' => \$max_auth_count,
+ 'start-auth-id=i' => \$start_auth_id,
+ 'print-bib-ids' => \$print_bib_ids,
+ 'print-auth-ids' => \$print_auth_ids,
+ 'link-bibs' => \$link_bibs,
+ 'link-auths' => \$link_auths,
+ 'progress' => \$progress,
+ "db-host=s" => \$db_host,
+ "db-user=s" => \$db_user,
+ "db-pass=s" => \$db_pass,
+ "db-port=s" => \$db_port,
+ 'help' => \$help
+);
+
+sub announce {
+ my $msg = shift;
+ print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n";
+}
+
+sub help {
+ print <<HELP;
+
+ Update or create bib-to-auth and auth-to-auth links for authority
+ records that were created or modified within the last X days.
+
+ Usage:
+
+ $0 --modified-since 1 --link-auths --link-bibs
+
+ Options:
+
+ --modified-since <days>
+ Process authority records created or modified within the
+ last <days> days.
+
+ --max-auth-count <count>
+ Process <count> authority records in total. Use with
+ --start-auth-id to process batches of records across
+ multiple instances of the script.
+
+ --start-auth-id <id>
+ Process authority records whose ID is equal to or greater
+ than <id>. Use with --max-auth-count to process batches
+ of records accross multiple runs of the script.
+
+ --print-auth-ids
+ Print authority record IDs to process to STDOUT
+
+ --print-bib-ids
+ Print bib record IDs to process to STDOUT
+
+ --link-auths
+ Run idenditifed authority records through authority_authority_linker.pl
+
+ --link-bibs
+ Run idenditifed bib records through authority_control_fields.pl
+
+ --progress
+ Log linking progess to STDOUT
+
+ --db-host
+ --db-user
+ --db-pass
+ --db-port
+
+ Database connection params. PG environment variables are
+ also inspected for values. When all else fails, try to
+ connect to database evergreen\@localhost
+HELP
+ exit 0;
+}
+
+help() if $help || !$opt_result;
+
+my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
+my $dbh = DBI->connect($dsn, $db_user, $db_pass)
+ or die "Cannot connect to database: $dsn\n";
+
+$dbh->do('SET statement_timeout = 0');
+
+# ----------------------------------------------------------------------
+# Load the authority record IDs
+my $where2 = $start_auth_id ? "AND id >= $start_auth_id" : '';
+my $limit = $max_auth_count ? "LIMIT $max_auth_count" : '';
+
+my $sth = $dbh->prepare(<<SQL);
+ SELECT id FROM authority.record_entry
+ WHERE DATE(edit_date) >= DATE(NOW() - '$modified_since day'::INTERVAL)
+ $where2
+ ORDER BY id
+ $limit
+SQL
+
+$sth->execute;
+
+while (my $ref = $sth->fetchrow_hashref()) {
+ push(@auth_ids, $ref->{id});
+}
+$sth->finish;
+
+my $auth_rec_count = scalar(@auth_ids);
+print join("\n", @auth_ids) if $print_auth_ids;
+
+# Let the caller know what the last record processed will be,
+# so the next iteration of the script can start there.
+announce("Final auth ID will be: " . $auth_ids[-1]) if $max_auth_count;
+
+if (!@auth_ids) {
+ announce("No authority records edited in the last $modified_since days");
+ exit 0;
+}
+
+# ----------------------------------------------------------------------
+# Auth-to-Auth linking
+
+if ($link_auths) {
+ # Pass all new authority records to the auth-to-auth linker
+ for my $rec_id (@auth_ids) {
+
+ system(
+ './authority_authority_linker.pl',
+ '--db-host', $db_host,
+ '--db-user', $db_user,
+ '--db-pass', ($db_pass || ''),
+ '--record', $rec_id
+ );
+
+ usleep(250000); # 1/4 second; allow ctrl-c to penetrate
+ announce("Auth records processed: $counter/$auth_rec_count")
+ if $progress && ++$counter % 10 == 0;
+ }
+}
+$counter = 0;
+
+# Exit if there is nothing left to do.
+exit unless $print_bib_ids || $link_bibs;
+
+# ----------------------------------------------------------------------
+# Find bib records that we might want to link to the new authority
+# record.
+#
+# Query: give me bib records that link to browse entries that also
+# link to exactly one authority record, specifically the new authority
+# records we are processing via this script. Only include bib records
+# that are not already linked via bib_linking to said authority record.
+# This represents the set of bib records that might need to be linked
+# to our new authority records.
+# ----------------------------------------------------------------------
+my %bib_ids; # de-dupe by record ID.
+my $auth_ids_param = join(',', @auth_ids);
+
+for my $axis (qw/author subject series title/) {
+ my $query = <<SQL;
+SELECT
+ entry.id,
+ are.id AS auth_record,
+ def.source AS bib_record
+FROM metabib.browse_${axis}_entry entry
+ JOIN metabib.browse_${axis}_entry_simple_heading_map map
+ ON (map.entry = entry.id)
+ JOIN authority.simple_heading ash ON (ash.id = map.simple_heading)
+ JOIN authority.record_entry are ON (are.id = ash.record)
+ JOIN metabib.browse_${axis}_entry_def_map def ON (def.entry = entry.id)
+ JOIN biblio.record_entry bre ON (bre.id = def.source)
+ JOIN (
+ -- we only care about browse entries that link to
+ -- exactly one auth record, the auth record in question.
+ SELECT entry.id, COUNT(are.id)
+ FROM metabib.browse_${axis}_entry entry
+ JOIN metabib.browse_${axis}_entry_simple_heading_map map
+ ON (map.entry = entry.id)
+ JOIN authority.simple_heading ash
+ ON (ash.id = map.simple_heading)
+ JOIN authority.record_entry are
+ ON (are.id = ash.record)
+ WHERE NOT are.deleted
+ GROUP BY 1
+ HAVING COUNT(are.id) = 1
+ ) singles ON (singles.id = entry.id)
+ LEFT JOIN authority.bib_linking link
+ ON (link.bib = def.source AND link.authority = are.id)
+WHERE
+ bre.deleted IS FALSE
+ AND link.authority IS NULL -- unlinked records
+ AND are.id IN ($auth_ids_param)
+SQL
+
+ $sth = $dbh->prepare($query);
+ $sth->execute;
+ while (my $ref = $sth->fetchrow_hashref()) {
+ $bib_ids{$ref->{bib_record}} = 1; # de-dupe
+ }
+ $sth->finish;
+}
+
+@bib_ids = sort(keys(%bib_ids));
+my $bib_rec_count = scalar(@bib_ids);
+
+if ($link_bibs) {
+ for my $rec_id (@bib_ids) {
+ # fire off the linker for each of the records identied
+ system('./authority_control_fields.pl',
+ '--db-host', $db_host,
+ '--db-user', $db_user,
+ '--db-pass', ($db_pass || ''),
+ '--record', $rec_id,
+ '--refresh'
+ );
+
+ usleep(250000); # 1/4 second; allow ctrl-c to penetrate
+ announce("Bib records processed: $counter/$bib_rec_count")
+ if $progress && ++$counter % 10 == 0;
+ }
+}
+
+print join("\n", @bib_ids) if $print_bib_ids;
+
+++ /dev/null
-= Backstage Processes =
-
-Perform steps as 'opensrf'
-
-== Quarterly Export + Import ==
-
-=== Setup ===
-
-[source,sh]
---------------------------------------------------------------------
-export EXPORT_DATE=2016-10-01 # for example
-export WORKING_DIR=/openils/var/data/authority-control/backstage/quarterly/$EXPORT_DATE
-export PGHOST=foo
-export PGPASSWORD=foo
-export PGUSER=evergreen
-mkdir -p $WORKING_DIR
---------------------------------------------------------------------
-
-=== Exporting Bib Records ===
-
-Bibs are exported as MARC and uploaded to Backstage
-
-==== Generate Export MARC File ====
-
-[source,sh]
---------------------------------------------------------------------
-./export-bibs.pl \
- --start-date 2010-01-01 \
- --end-date 2016-06-01 \
- --export-date $EXPORT_DATE \
- --out-file $WORKING_DIR/exported-bibs.$EXPORT_DATE.mrc
-
-# Send file to BS FTP server in(bound) directory.
---------------------------------------------------------------------
-
-=== Process Results ===
-
-==== Fetch Results ====
-
-[source,sh]
---------------------------------------------------------------------
-cd $WORKING_DIR
-wget <backstage-results-file>
---------------------------------------------------------------------
-
-==== Process Results Files ====
-
-* Import new and modified authority records.
-* Import modified bib records.
-
-[source,sh]
---------------------------------------------------------------------
-cd /home/opensrf/Evergreen/KCLS/backstage/
-
-./process-backstage-files.pl \
- --verbose \
- --export-date $EXPORT_DATE \
- --file $WORKING_DIR/<results-file> \
- --working-dir $WORKING_DIR \
- --bib-collision-file bib-collisions.mrc \
- > $WORKING_DIR/process.log
---------------------------------------------------------------------
-
-==== Process Bib Collisions ====
-
-Bib records that were locally modified during Backstage processing are
-re-imported without clobbering the modifications.
-
-1. Create a new queue for this batch (rename to suit).
-
-[source,sh]
---------------------------------------------------------------------
-INSERT INTO vandelay.bib_queue (owner, name)
- VALUES (1, 'Backstage Q3 2016');
-SELECT id FROM vandelay.bib_queue WHERE name = 'Backstage Q3 2016';
---------------------------------------------------------------------
-
-2. Import bib collisions via stream importer.
-
-[source,sh]
---------------------------------------------------------------------
-# Make a copy of the collisions file for safe keeping,
-# since open-ils.vandelay deletes spool files.
-cp $WORKING_DIR/bib-collisions.mrc $WORKING_DIR/bib-collisions.bak.mrc
-
-cd /openils/bin
-./marc_stream_importer.pl \
- --spoolfile $WORKING_DIR/bib-collisions.mrc \
- --user admin \
- --password XXX \
- --bib-auto-overlay-exact \
- --queue <new-queue-id> \
- --merge-profile 104 # "Backstage Field Protection"
---------------------------------------------------------------------
-
-==== Get auth/bib IDs that require re-linking ====
-
-[source,sh]
---------------------------------------------------------------------
-cd /home/opensrf/Evergreen/KCLS/linking/
-
-./link-new-auth-records.pl --modified-since <days-since-reimport> \
- --progress --print-auth-ids \
- > $WORKING_DIR/auths-to-link.txt
-
-./link-new-auth-records.pl --modified-since <days-since-reimport> \
- --progress --print-bib-ids \
- > $WORKING_DIR/bibs-to-link.txt
---------------------------------------------------------------------
-
-==== Re-Link Modified Auths and Bibs ====
-
-[source,sh]
---------------------------------------------------------------------
-cd /home/opensrf/Evergreen/KCLS/authority-control/linking/
-
-./authority_authority_linker.pl --verbose \
- --file $WORKING_DIR/auths-to-link.txt \
- | tee -a $WORKING_DIR/auth2auth-linking.log
-
-# Bib linking takes many hours, sometimes days.
-
-./authority_control_fields.pl --verbose --refresh \
- --file $WORKING_DIR/bibs-to-link.txt \
- | tee -a $WORKING_DIR/bib-linking.log
---------------------------------------------------------------------
-
+++ /dev/null
-#!/usr/bin/env perl
-# -----------------------------------------------------------------------
-# Export bib records for Backstage processing.
-#
-# The UTF-8 encoded USMARC string for each record is printed to STDOUT.
-# Each exported bib has its export_date value updated to NOW().
-#
-# Exported bibs meet the following criteria:
-#
-# 1. Delete flag must be false.
-# 2. Record cannot contain any 086, 092, or 099 tags containing the phrase 'on order'
-# 3. Boolean filter:
-# [ (001_test OR 035_test) AND has_holdings AND cat_date_in_range ]
-# OR
-# [ 998_test AND create_date_in_range ]
-# -----------------------------------------------------------------------
-use strict;
-use warnings;
-use DBI;
-use Getopt::Long;
-use MARC::Record;
-use MARC::File::XML (BinaryEncoding => 'UTF-8');
-
-my $db_handle;
-
-my $start_date;
-my $end_date;
-my $export_date;
-my $ids_only;
-my $count_only;
-my $out_file;
-my $limit;
-my $db_user = $ENV{PGUSER} || 'evergreen';
-my $db_name = $ENV{PGDATABASE} || 'evergreen';
-my $db_host = $ENV{PGHOST} || 'localhost';
-my $db_port = $ENV{PGPORT} || '5432';
-my $db_pass = $ENV{PGPASSWORD};
-my $help;
-
-GetOptions(
- 'start-date=s' => \$start_date,
- 'end-date=s' => \$end_date,
- 'export-date=s' => \$export_date,
- 'ids-only' => \$ids_only,
- 'count-only' => \$count_only,
- 'out-file=s' => \$out_file,
- 'limit=f' => \$limit,
- 'db-user=s' => \$db_user,
- 'db-host=s' => \$db_host,
- 'db-name=s' => \$db_name,
- 'db-port=i' => \$db_port,
- 'db-pass=s' => \$db_pass,
- 'help' => \$help
-);
-
-sub help {
- print <<HELP;
-
-Export bib records for uploading to Backstage for processing.
-MARC data is sent to STDOUT. Redirect as needed.
-
-$0 --start-date 2015-06-01 \
- --end-date 2016-06-01 \
- --export-date 2016-06-01
-
-Options
-
- --start-date <YYYY-MM-DD>
- --end-date <YYYY-MM-DD>
- Export bib records whose cataloging_date (for physical records) or
- create_date (for electronic records) value is between the provided
- start and end dates.
-
- --export-date <YYYY-MM-DD>
- Sets the export date to the provided value. If no --export-date
- value is set, no export date value will be applied in the database.
-
- --out-file </path/to/file>
- Write MARC records (or IDs) to this file.
-
- --ids-only
- Write bib record IDs to the output file instead of the full MARC
- record.
-
- --count-only
- Only print the number of bibs that would be exported to STDOUT.
-
- --limit
- Export at most this many records.
-
-HELP
- exit;
-}
-
-die "--start-date and --end-date required\n"
- unless $start_date && $end_date;
-
-die "Invalid date format\n" unless
- $start_date =~ /^\d{4}-\d{2}-\d{2}$/ &&
- $end_date =~ /^\d{4}-\d{2}-\d{2}$/ &&
- (!$export_date || $export_date =~ /^\d{4}-\d{2}-\d{2}$/);
-
-die "--out-file <filename> required\n" unless $out_file || $count_only;
-
-sub bib_query {
- my $sql = <<SQL;
-
--- viable_records include filters applied to all records.
--- not deleted
--- is not on order (086/092/099 test)
-WITH viable_records AS (
-
- SELECT bre.id, bre.cataloging_date, bre.create_date
- FROM biblio.record_entry bre
- WHERE NOT deleted
- AND NOT EXISTS (
- SELECT 1 FROM metabib.real_full_rec
- WHERE record = bre.id
- AND tag IN ('086', '092', '099')
- AND value ILIKE '%on order%'
- )
-
--- electronic_records have a valid 998d value for electronic records
-), electronic_records AS (
-
- SELECT vr.id, create_date AS filter_date
- FROM viable_records vr
- JOIN metabib.real_full_rec mrfr ON (vr.id = mrfr.record)
- WHERE
- mrfr.tag = '998' AND
- mrfr.subfield = 'd' AND
- mrfr.value IN ('d','t','v','w','x','y','1','6')
-
--- physical records are non-electronic, have at least one viable
--- linked copy, have a valid 001 OR 035 field, and have a
--- non-null cataloging date.
-), physical_records AS (
-
- SELECT vr.id, vr.cataloging_date AS filter_date
- FROM viable_records vr
- JOIN metabib.real_full_rec mfr ON (
- mfr.record = vr.id AND (
- (
- mfr.tag = '001' AND (
- mfr.value ILIKE 'oc%' OR
- mfr.value ILIKE 'on%' OR
- mfr.value ILIKE 'wln%'
- )
- ) OR (
- mfr.tag = '035' AND
- mfr.subfield = 'a' AND
- mfr.value ILIKE '%WaOLN%'
- )
- )
- )
- WHERE vr.cataloging_date IS NOT NULL AND EXISTS (
- -- bib has at least one non-deleted copy
- SELECT acp.id
- FROM asset.copy acp
- WHERE call_number IN (
- SELECT id FROM asset.call_number
- WHERE record = vr.id AND NOT deleted
- )
- AND NOT deleted
- LIMIT 1
- )
-
-), combined_records AS (
- SELECT id, filter_date
- FROM electronic_records
- UNION
- SELECT id, filter_date
- FROM physical_records
-)
-
-SELECT DISTINCT(cr.id)
-FROM combined_records cr
-WHERE cr.filter_date BETWEEN '$start_date' AND '$end_date'
-SQL
-
- $sql .= " LIMIT $limit" if $limit;
- return $sql;
-}
-
-
-# Finds bibs to export and prints their MARC to STDOUT
-sub export_marc {
-
- if ($out_file) {
- open(MARCFILE, ">$out_file")
- or die "Cannot open file for writing: $out_file\n";
- binmode(MARCFILE, ':utf8');
- }
-
- my $sth = $db_handle->prepare(bib_query());
- my $edate_sth = $db_handle->prepare(
- 'SELECT * FROM metabib.set_export_date(?, ?)');
-
- $sth->execute;
- my $count = 0;
- while (my $bib = $sth->fetchrow_hashref) {
- $count++;
- next if $count_only;
-
- my $bib_id = $bib->{id};
-
- if ($ids_only) {
- print MARCFILE "$bib_id\n";
- print "$count records written...\n" if ($count % 1000) == 0;
- next;
- }
-
- my $rec = $db_handle->selectall_arrayref(
- "SELECT marc FROM biblio.record_entry WHERE id = $bib_id");
-
- my $marc = $rec->[0]->[0];
- my $marcdoc = MARC::Record->new_from_xml($marc, 'UTF-8', 'USMARC');
-
- print MARCFILE $marcdoc->as_usmarc;
-
- print "$count records written...\n" if ($count % 1000) == 0;
-
- next unless $export_date;
-
- # Update the bib record's metabib.bib_export_data entry.
- eval { $edate_sth->execute($bib_id, $export_date) };
- die "Error setting export date for bib ".
- "$bib_id to $export_date : $@\n" if $@;
- }
-
- close(MARCFILE) if $out_file;
-
- print "$count total bib records\n";
-
- $sth->finish;
- $edate_sth->finish;
-}
-
-sub connect_db {
- $db_handle = DBI->connect(
- "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
- $db_user, $db_pass, {
- RaiseError => 1,
- PrintError => 0,
- AutoCommit => 1,
- pg_expand_array => 0,
- pg_enable_utf8 => 1
- }
- ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
-}
-
-connect_db();
-export_marc();
-
-$db_handle->disconnect;
-
+++ /dev/null
-#!/usr/bin/env perl
-# -----------------------------------------------------------------------
-# TODO: summary
-#
-# TODO:
-# Disable auth record change propagation during auth record updates.
-# -----------------------------------------------------------------------
-use strict;
-use warnings;
-use DBI;
-use DateTime;
-use Getopt::Long;
-use MARC::Record;
-use MARC::File::XML (BinaryEncoding => 'UTF-8');
-use MARC::File::USMARC;
-use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
-use File::Basename;
-use Sys::Syslog qw(syslog openlog);
-use OpenILS::Utils::Normalize qw(clean_marc);
-binmode(STDOUT, ':utf8');
-
-my $db_handle;
-my $log_mod = 500; # log every 500th of each type of event (see verbose)
-
-my $file;
-my $export_date;
-my $working_dir = '.',
-my $bib_collision_file;
-my $verbose;
-my $db_user = $ENV{PGUSER} || 'evergreen';
-my $db_name = $ENV{PGDATABASE} || 'evergreen';
-my $db_host = $ENV{PGHOST} || 'localhost';
-my $db_port = $ENV{PGPORT} || '5432';
-my $db_pass = $ENV{PGPASSWORD};
-
-my $syslog_facility = 'LOCAL6'; # matches Evergreen gateway
-my $syslog_ops = 'pid';
-my $syslog_ident = 'BACKSTAGE';
-
-my $new_auth_sth;
-my $mod_auth_sth;
-my $del_auth_sth;
-my $delmod_auth_sth;
-my $mod_bibs_sth;
-my $match_auth_sth;
-my $match_auth_001_sth;
-my $new_auth_ctr = 0;
-my $mod_auth_ctr = 0;
-my $del_auth_ctr = 0;
-my $mod_bibs_ctr = 0;
-my $col_bibs_ctr = 0;
-
-my $help;
-
-GetOptions(
- 'file=s' => \$file,
- 'export-date=s' => \$export_date,
- 'working-dir=s' => \$working_dir,
- 'bib-collision-file=s' => \$bib_collision_file,
- 'verbose' => \$verbose,
- 'db-user=s' => \$db_user,
- 'db-host=s' => \$db_host,
- 'db-name=s' => \$db_name,
- 'db-port=i' => \$db_port,
- 'db-pass=s' => \$db_pass,
- 'help' => \$help
-);
-
-sub help {
- print <<HELP;
-
- TODO
-
-$0
-
-Options
-
- --export-date
- Bib records modified within EG since this time will be treated
- specially when ingesting bib records produced by Backstage to
- avoid losing change made by staff since the export.
-
- --file
- Full path to ZIP file to process.
-
- --working-dir
- Directory where constituent files are extracted.
- Defaults to the CWD of this script.
-
- --bib-collision-file
- File created in the working directory containing MARC data of
- bib records that were modified by staff after export and
- modified by Backstage as part of the export. These are
- re-imported via external Vandelay process.
-
- --db-host
- --db-name
- --db-port
- --db-pass
- Database connections parameters.
-
-HELP
- exit;
-}
-
-die "required: --export-date YYYY-MM-DD\n" unless
- $export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/;
-
-die "--file required\n" unless $file;
-
-# Log every occurrence of each event type.
-$log_mod = 1 if $verbose;
-
-sub announce {
- my ($level, $msg, $die) = @_;
- syslog("LOG_$level", $msg);
-
- my $date_str = DateTime->now(time_zone => 'local')->strftime('%F %T');
- my $msg_str = "$date_str [$$] $level $msg\n";
-
- if ($die) {
- die $msg_str;
-
- } else {
- if ($level eq 'ERR' or $level eq 'WARNING') {
- # always copy problem messages to stdout
- warn $msg_str; # avoid dupe messages
- } else {
- print $msg_str;
- }
- }
-}
-
-
-sub connect_db {
- $db_handle = DBI->connect(
- "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
- $db_user, $db_pass, {
- RaiseError => 1,
- PrintError => 0,
- AutoCommit => 1,
- pg_expand_array => 0,
- pg_enable_utf8 => 1
- }
- ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
-}
-
-sub process_zip_file {
-
- my $zip = Archive::Zip->new();
-
- announce('ERR', "Failed to read $file", 1)
- unless $zip->read($file) == AZ_OK;
-
- # Avoid processing XLS and HTM files.
- # All of the MARC files end in .UTF8.
- for my $member ($zip->membersMatching('.*(\.UTF8|\.MRC)')) {
-
- my $basename = basename($member->fileName());
-
- announce('INFO', "Extracting file $basename");
-
- my $local_file = "$working_dir/$basename";
-
- announce('ERR', "Unable to extract to file: $local_file", 1)
- unless $member->extractToFileNamed($local_file) == AZ_OK;
-
- my $marc_batch = MARC::File::USMARC->in($local_file, 'UTF8')
- or announce('ERR', "Unable to read $local_file as MARC", 1);
-
- if ($basename =~ /BIB/) {
-
- handle_modified_bibs($marc_batch);
-
- } elsif ($basename =~ /DEL/) {
-
- handle_deleted_auths($marc_batch);
-
- } elsif ($basename =~ /CHG|NEW|AUTH/) {
-
- handle_modified_auths($marc_batch);
-
- } else {
- announce('WARNING', "Un-handled file type: $basename");
- }
- }
-}
-
-# Returns ID's of bib records that have been modified since the export date.
-my @modified_bibs;
-my $mod_searched = 0;
-sub find_modified_bibs {
-
- return if $mod_searched;
- $mod_searched = 1;
-
- my $id_arrays = $db_handle->selectall_arrayref(<<" SQL");
- SELECT id
- FROM biblio.record_entry
- WHERE NOT deleted AND edit_date >= '$export_date'
- SQL
-
- @modified_bibs = map {$_->[0]} @$id_arrays;
-
- announce('INFO', scalar(@modified_bibs)." bibs modified since export");
-}
-
-
-
-# 1. Bibs that have been modified by Backstage and locally are written
-# to the --bib-collision-file as MARC for later processing.
-# 2. Bibs that have only been modified by Backstage are updated
-# directly in the database.
-sub handle_modified_bibs {
- my $marc_batch = shift;
-
- find_modified_bibs();
-
- while (my $record = $marc_batch->next()) {
- my $bib_id = $record->subfield('901', 'c');
-
- if (!$bib_id) {
- announce('ERR', "Bib record has no 901c (ID) value. Skipping");
- next;
- }
-
- if (grep {$bib_id eq $_} @modified_bibs) {
- # Bib was edited by both parties. Save to external file
- # for later processing.
-
- write_bib_collision($record);
-
- } else {
- # Update our copy of the record.
-
- my $marcxml = clean_marc($record->as_xml_record());
- update_bib($marcxml, $bib_id);
- }
- }
-}
-
-sub update_bib {
- my $marcxml = shift;
- my $bib_id = shift;
-
- eval { $mod_bibs_sth->execute($marcxml, $bib_id) };
-
- if ($@) {
- announce('ERR', "Error updating biblio record: $@ : $marcxml");
- return;
- }
-
- $mod_bibs_ctr++;
-
- announce('INFO', "Updated $mod_bibs_ctr bib records")
- if $mod_bibs_ctr % $log_mod == 0;
-}
-
-sub write_bib_collision {
- my $record = shift;
-
- my $filename = "$working_dir/$bib_collision_file";
-
- open(BIBS_FILE, ">>$filename") or
- announce('ERR', "Cannot open bib collision file: $filename : $!", 1);
-
- binmode(BIBS_FILE, ":utf8");
-
- print BIBS_FILE $record->as_usmarc();
-
- close BIBS_FILE or
- announce('WARNING', "Error closing bib collision file: $filename : $!");
-
- $col_bibs_ctr++;
-
- announce('INFO', "Dumped $col_bibs_ctr bib collisions to file")
- if $col_bibs_ctr % $log_mod == 0;
-}
-
-sub handle_deleted_auths {
- my $marc_batch = shift;
-
- while (my $record = $marc_batch->next()) {
- my @matches = find_matching_auths($record);
-
- for my $auth_id (@matches) {
-
- eval {
- # 2 mods.. wrap in transaction? (see autocommit)
- $del_auth_sth->execute($auth_id);
- $delmod_auth_sth->execute($auth_id);
- };
-
- if ($@) {
- announce(
- 'ERR', "Error deleting authority record: $@ : $auth_id");
- next;
- }
-
- $del_auth_ctr++;
-
- announce('INFO', "Deleted $del_auth_ctr authority records")
- if $del_auth_ctr % $log_mod == 0;
- }
- }
-}
-
-sub handle_modified_auths {
- my $marc_batch = shift;
-
- while (my $record = $marc_batch->next()) {
-
- modify_auth_005($record);
-
- my @matches = find_matching_auths($record);
- push(@matches, find_replaced_auths($record));
-
- my $marcxml = clean_marc($record->as_xml_record());
-
- if (@matches) {
- update_auth($marcxml, $_) for @matches;
- } else {
- insert_auth($marcxml);
- }
- }
-}
-
-# Update the 005 field to the current date
-sub modify_auth_005 {
- my $record = shift;
- my $field_005 = $record->field('005');
-
- # MARC 005-formatted date value
- my $now_date = DateTime->now(
- time_zone => 'local')->strftime('%Y%m%d%H%M%S.0');
-
- if ($field_005) {
- $field_005->update($now_date);
-
- } else {
- $field_005 = MARC::Field->new('005', $now_date);
- $record->insert_fields_ordered($field_005);
- }
-}
-
-
-sub update_auth {
- my $marcxml = shift;
- my $auth_id = shift;
-
- eval { $mod_auth_sth->execute($marcxml, $auth_id) };
-
- if ($@) {
- announce('ERR', "Error updating authority record: $@ : $marcxml");
- return;
- }
-
- $mod_auth_ctr++;
-
- announce('INFO', "Updated $mod_auth_ctr authority records")
- if $mod_auth_ctr % $log_mod == 0;
-}
-
-sub insert_auth {
- my $marcxml = shift;
-
- eval { $new_auth_sth->execute($marcxml, "IMPORT-" . time) };
-
- if ($@) {
- announce('ERR',
- "Error creating new authority record: $@ : $marcxml");
- return;
- }
-
- $new_auth_ctr++;
-
- announce('INFO', "Created $new_auth_ctr authority records")
- if $new_auth_ctr % $log_mod == 0;
-}
-
-# Return ID's of authority records that should be replaced by the
-# current record. Checks for records whose 010$a equals the 010$z of
-# the current record.
-# 010$z == Canceled/invalid LC control number
-sub find_replaced_auths {
- my $record = shift;
-
- my $subfield = $record->subfield('010', 'z');
- return () unless $subfield;
-
- $match_auth_sth->execute('010', $subfield);
- my $matches = $match_auth_sth->fetchall_arrayref;
- my @ids = map {$_->[0]} @$matches;
-
- announce('INFO', "Auth 010z=$subfield matched records: @ids") if @ids;
-
- return @ids;
-}
-
-# Return ID's of matching authority records. Matching tries:
-# 001 -> 010a -> 035a.
-sub find_matching_auths {
- my $record = shift;
-
- my $tag = '001';
- my $subfield;
-
- # 001 test requires its own SQL query
- if (my $field = $record->field($tag)) {
- if ($subfield = $field->data) {
-
- $match_auth_001_sth->execute($subfield);
- my $matches = $match_auth_001_sth->fetchall_arrayref;
- my @ids = map {$_->[0]} @$matches;
- announce('INFO',
- "Auth 001=$subfield matched records: @ids") if @ids;
- return @ids;
- }
- }
-
- $tag = '010';
- $subfield = $record->subfield($tag, 'a');
-
- if (!$subfield) {
- $tag = '035';
- $subfield = $record->subfield($tag, 'a');
- }
-
- return () unless $subfield;
-
- $match_auth_sth->execute($tag, $subfield);
- my $matches = $match_auth_sth->fetchall_arrayref;
-
- my @ids = map {$_->[0]} @$matches;
- announce('INFO', "Auth ${tag}a=$subfield matched records: @ids") if @ids;
-
- return @ids;
-}
-
-sub prepare_statements {
-
- $del_auth_sth = $db_handle->prepare(<<" SQL");
- DELETE FROM authority.record_entry WHERE id = ?
- SQL
-
- $delmod_auth_sth = $db_handle->prepare(<<" SQL");
- UPDATE authority.record_entry
- SET edit_date = NOW() WHERE id = ?
- SQL
-
- $mod_bibs_sth = $db_handle->prepare(<<" SQL");
- UPDATE biblio.record_entry
- SET marc = ?, edit_date = NOW()
- WHERE id = ?
- SQL
-
- $mod_auth_sth = $db_handle->prepare(<<" SQL");
- UPDATE authority.record_entry
- SET marc = ?, edit_date = NOW()
- WHERE id = ?
- SQL
-
- $new_auth_sth = $db_handle->prepare(<<" SQL");
- INSERT INTO authority.record_entry (marc, last_xact_id)
- VALUES (?, ?)
- SQL
-
- $match_auth_sth = $db_handle->prepare(<<" SQL");
- SELECT DISTINCT(rec.id)
- FROM authority.record_entry rec
- JOIN authority.full_rec frec ON (frec.record = rec.id)
- WHERE
- NOT rec.deleted
- AND frec.tag = ?
- AND frec.subfield = 'a'
- AND frec.value = NACO_NORMALIZE(?, 'a')
- SQL
-
- $match_auth_001_sth = $db_handle->prepare(<<" SQL");
- SELECT DISTINCT(rec.id)
- FROM authority.record_entry rec
- JOIN authority.full_rec frec ON (frec.record = rec.id)
- WHERE
- NOT rec.deleted
- AND frec.tag = '001'
- AND frec.value = ?
- SQL
-}
-
-openlog($syslog_ident, $syslog_ops, $syslog_facility);
-connect_db();
-prepare_statements();
-process_zip_file();
-
-$new_auth_sth->finish;
-$mod_auth_sth->finish;
-$del_auth_sth->finish;
-$delmod_auth_sth->finish;
-$match_auth_sth->finish;
-$match_auth_001_sth->finish;
-$mod_bibs_sth->finish;
-
-$db_handle->disconnect;
-
+++ /dev/null
-#!/usr/bin/perl
-
-use strict;
-use warnings;
-use DBI;
-use Getopt::Long;
-use MARC::Record;
-use MARC::File::XML (BinaryEncoding => 'UTF-8');
-use MARC::Charset;
-use OpenSRF::System;
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::SettingsClient;
-use OpenSRF::EX qw/:try/;
-use Encode;
-use Unicode::Normalize;
-use OpenILS::Utils::Normalize;
-use Data::Dumper;
-use Pod::Usage qw/ pod2usage /;
-
-$ENV{OSRF_LOG_CLIENT} = 1;
-
-$Data::Dumper::Indent = 0;
-MARC::Charset->assume_unicode(1);
-
-my $acsaf_cache = {};
-
-sub get_acsaf {
- my ($e, $id) = @_;
-
- $acsaf_cache->{$id} ||=
- $e->retrieve_authority_control_set_authority_field([
- $id,
- {flesh => 1, flesh_fields => {acsaf => ["main_entry"]}}
- ]);
- return $acsaf_cache->{$id};
-}
-
-sub matchable_string {
- my ($field, $sf_list, $joiner) = @_;
- $joiner ||= ' ';
-
- return join($joiner, map { $field->subfield($_) } split "", $sf_list);
-}
-
-# ########### main
-my ($start_id, $end_id);
-my $bootstrap = '/openils/conf/opensrf_core.xml';
-my @records;
-my $verbose;
-my $input_file ='';
-my $db_host = $ENV{PGHOST} || 'localhost';
-my $db_port = $ENV{PGPORT} || '5432';
-my $db_user = $ENV{PGDATABASE} || 'evergreen';
-my $db_pass = $ENV{PGPASSWORD};
-my $links_removed = 0;
-my $links_added = 0;
-my $CNI = 'KCLS';
-
-my %options;
-my $result = GetOptions(
- \%options,
- 'configuration=s' => \$bootstrap,
- 'record=i' => \@records,
- 'all', 'help',
- 'start_id=i' => \$start_id,
- 'end_id=i' => \$end_id,
- 'file=s' => \$input_file,
- 'verbose' => \$verbose,
- "db-host=s" => \$db_host,
- "db-user=s" => \$db_user,
- "db-pass=s" => \$db_pass,
- "db-port=s" => \$db_port
-);
-
-sub announce {
- my $msg = shift;
- return unless $verbose;
- print DateTime->now(time_zone => 'local')->strftime('%F %T') . " $msg\n";
-}
-
-pod2usage(0) if not $result or $options{help};
-
-OpenSRF::System->bootstrap_client(config_file => $bootstrap);
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-# must be loaded and initialized after the IDL is parsed
-
-use OpenILS::Utils::CStoreEditor;
-OpenILS::Utils::CStoreEditor::init();
-
-my $e = OpenILS::Utils::CStoreEditor->new;
-
-my $query = q{
- SELECT
- source,
- ARRAY_TO_STRING(ARRAY_AGG(target || ',' || field), ';') AS links
- FROM (
- SELECT sh1.record AS target,
- sh2.record AS source,
- sh2.atag AS field
- FROM authority.simple_heading sh1
- JOIN authority.simple_heading sh2 USING (sort_value)
- JOIN authority.control_set_authority_field af1 ON
- (sh1.atag = af1.id AND af1.main_entry IS NULL)
- JOIN authority.control_set_authority_field af2 ON
- (sh2.atag = af2.id AND af2.main_entry IS NOT NULL
- AND af2.linking_subfield IS NOT NULL)
- %s -- where clause here
- -- Ignore authority.authority_linking rows since we want to
- -- rebuild all links, which may mean deleting bogus links.
- -- EXCEPT SELECT target, source, field FROM authority.authority_linking
- -- order by source for consistent testing
- ) x GROUP BY 1 ORDER BY source
-};
-
-my @bind_params;
-if (@records) {
- $query = sprintf($query, "WHERE sh2.record = ?");
- @bind_params = @records; # should be just one scalar in this array.
-} elsif ($options{all}) {
- $query = sprintf($query, ""); # no where clause
-} elsif ($start_id and $end_id) {
- $query = sprintf($query, "WHERE sh2.record BETWEEN ? AND ?");
- @bind_params = ($start_id, $end_id);
-
-} elsif ($input_file) {
- # Load authority record IDs from a file.
- announce("Reading authority record IDs from $input_file");
-
- open FILE, "<", $input_file or die "Can't open file $input_file\n";
- while(<FILE>) {
- chomp;
- push(@records, $_) if $_;
- }
- close FILE;
-
- announce("Read ".scalar(@records)." from $input_file");
-
- # No bind_params needed.
- my $recstr = join(',', @records);
- $query = sprintf($query, "WHERE sh2.record IN ($recstr)");
-
-} else {
- pod2usage(0);
-}
-
-announce("SQL, params: ", Dumper($query, \@bind_params));
-
-my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
-my $dbh = DBI->connect($dsn, $db_user, $db_pass);
-$dbh->do('SET statement_timeout = 0');
-
-my $sth = $dbh->prepare($query);
-
-announce("Executing query ...");
-$sth->execute(@bind_params);
-
-my $problems = 0;
-
-if (@records) {
- announce("Processing ".scalar(@records)." records");
-} elsif ($start_id) {
- announce("Processing records $start_id => $end_id");
-} else {
- announce("Processing all records");
-}
-
-my $total_records = 0;
-while (my ($src, $links) = $sth->fetchrow_array) {
- announce("--------------------");
- announce("Processing authority source record $src");
- $total_records++;
-
- try {
-
- my $src_rec = $e->retrieve_authority_record_entry($src);
- if (!$src_rec) {
- warn "Unable to load source authority record $src\n";
- next;
- }
-
- my $src_marc = MARC::Record->new_from_xml($src_rec->marc);
- my $auth_src_thesaurus = substr($src_marc->field('008')->data(), 11, 1);
- announce("Source record thesaurus value=$auth_src_thesaurus");
-
- my $changed = 0;
- my %tags_seen;
- for my $link (split ';', $links) {
- my ($target, $field_id) = split ',', $link;
-
- next if $target eq $src_rec->id;
-
- announce("Target: $target, field_id: $field_id");
-
- my $target_rec = $e->retrieve_authority_record_entry($target);
- if (!$target_rec) {
- warn "Unable to load authority record $target. Skipping\n";
- next;
- }
-
- my $target_marc = MARC::Record->new_from_xml($target_rec->marc);
-
- my $auth_target_thesaurus =
- substr($target_marc->field('008')->data(), 11, 1);
-
- announce("Target record thesaurus value=$auth_target_thesaurus");
-
- if ($auth_src_thesaurus ne $auth_target_thesaurus) {
- announce("Thesauri for source/target records do not match. Skipping..");
- next;
- }
-
- my $acsaf = get_acsaf($e, $field_id);
- if (!$acsaf) {
- warn "No authority control set field found for $field_id. Skipping\n";
- next;
- }
-
- if (!$tags_seen{$acsaf->tag}) {
- # the first time we process each tag for a given record,
- # remove all existing auth-to-auth link subfields
- # so they can be completely rebuilt.
- for my $field ($src_marc->field($acsaf->tag)) {
- if (my $val = $field->subfield('0')) {
- announce("Removing existing subfield 0 : $val");
- $field->delete_subfield(code => '0');
- $changed = 1;
- $links_removed++;
- }
- }
- $tags_seen{$acsaf->tag} = 1;
- }
-
- # rebuild the links for the current tag
- for my $field ($src_marc->field($acsaf->tag)) {
-
- my $src_string = matchable_string(
- $field, $acsaf->main_entry->display_sf_list,
- $acsaf->main_entry->joiner
- );
-
- announce(sprintf(
- "At field id=%s (tag=%s) / trying to match '%s'",
- $acsaf->id, $acsaf->tag, $src_string));
-
- my ($tfield) = $target_marc->field($acsaf->main_entry->tag);
-
- if(defined $tfield) {
-
- my $target_string = matchable_string(
- $tfield, $acsaf->main_entry->display_sf_list,
- $acsaf->main_entry->joiner
- );
-
- if ($target_string eq $src_string) {
- announce("Got a match");
- $field->update('0' => "($CNI)$target");
- $changed = 1;
- $links_added++;
- }
- }
- }
- }
-
- if ($changed) {
- announce("Updating authority record ".$src_rec->id);
- $src_rec->marc(OpenILS::Utils::Normalize::clean_marc($src_marc));
- $e->xact_begin;
- $e->update_authority_record_entry($src_rec) or die $e->die_event;
- $e->xact_commit;
- }
-
- } otherwise {
- my $err = shift;
- print STDERR "\nRecord # $src : ",
- (ref $err eq "HASH" ? Dumper($err) : $err), "\n";
-
- # Reset SAX parser so that one bad record doesn't
- # kill the entire process.
-
- import MARC::File::XML;
- $problems++;
- }
-}
-
-if (@records) {
- announce("Processed records ". scalar(@records).
- " records; processed=$total_records problems=$problems");
-} elsif ($start_id) {
- announce("Processed records $start_id => $end_id; ".
- "processed=$total_records; problems=$problems");
-} else {
- announce("Processed all records; processed=$total_records; problems=$problems");
-}
-
-announce("links removed: $links_removed");
-announce("links added: $links_added");
-announce("delta added: ".($links_added - $links_removed));
-
-exit ($problems > 0);
-
-__END__
-
-=head1 NAME
-
-authority_authority_linker.pl - Link reference headings in authority records to main entry headings in other authority records
-
-=head1 SYNOPSIS
-
-authority_authority_linker.pl [B<--configuration>=I<opensrf_core.conf>]
-[[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
-
-=head1 DESCRIPTION
-
-For a given set of records, find authority reference headings that also
-appear as main entry headings in any other authority record. In the
-specific MARC field of the authority record (source) containing the reference
-heading with such a match in another authority record (target), add a subfield
-0 (zero) referring to the target record by ID.
-
-=head1 OPTIONS
-
-=over
-
-=item * B<-c> I<config-file>, B<--configuration>=I<config-file>
-
-Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
-Defaults to F</openils/conf/opensrf_core.xml>
-
-=item * B<-r> I<record-ID>, B<--record>=I<record-ID>
-
-Specifies the authority record ID (found in the C<authority.record_entry.id>
-column) of the B<source> record to process. This option may be specified more
-than once to process multiple records in a single run.
-
-=item * B<-a>, B<--all>
-
-Specifies that all authority records should be processed. For large
-databases, this may take an extraordinarily long amount of time.
-
-=item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
-
-Specifies the starting ID of the range of authority records to process.
-This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
-option.
-
-=item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
-
-Specifies the ending ID of the range of authority records to process.
-This option is ignored unless it is accompanied by the B<-s> or B<--start>
-option.
-
-=back
-
-=head1 EXAMPLES
-
- authority_authority_linker.pl --start_id 1 --end_id 50000
-
-Processes the authority records with IDs between 1 and 50,000 using the
-default OpenSRF configuration file for connection information.
-
-=head1 AUTHOR
-
-Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright (C) 2013 Equinox Software, Inc.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA.
-
-=cut
+++ /dev/null
-#!/usr/bin/perl
-# Copyright (C) 2010-2011 Laurentian University
-# Author: Dan Scott <dscott@laurentian.ca>
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version 2
-# of the License, or (at your option) any later version.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-# ---------------------------------------------------------------
-
-use strict;
-use warnings;
-use DBI;
-use Getopt::Long;
-use MARC::Record;
-use MARC::File::XML (BinaryEncoding => 'UTF-8');
-use MARC::Charset;
-use OpenSRF::System;
-use OpenILS::Utils::Fieldmapper;
-use OpenSRF::Utils::SettingsClient;
-use Encode;
-use Unicode::Normalize;
-use OpenILS::Application::AppUtils;
-use Data::Dumper;
-use Pod::Usage qw/ pod2usage /;
-use DateTime;
-
-$ENV{OSRF_LOG_CLIENT} = 1;
-
-$Data::Dumper::Indent = 0;
-MARC::Charset->assume_unicode(1);
-
-my $start_id;
-my $end_id;
-my $refresh;
-my $days_back; # days; 0 means today only
-my $modified_since; # days; 0 means today only
-my $input_file ='';
-my $bootstrap = '/openils/conf/opensrf_core.xml';
-my @records;
-my $verbose = 0;
-my $sort_desc = 0;
-my $db_host = $ENV{PGHOST} || 'localhost';
-my $db_port = $ENV{PGPORT} || '5432';
-my $db_user = $ENV{PGDATABASE} || 'evergreen';
-my $db_pass = $ENV{PGPASSWORD};
-my $CNI = 'KCLS';
-
-my %options;
-my $result = GetOptions(
- \%options,
- 'configuration=s' => \$bootstrap,
- 'record=i' => \@records,
- 'refresh' => \$refresh,
- 'start-id=i' => \$start_id,
- 'end-id=i' => \$end_id,
- 'days-back=i' => \$days_back,
- 'modified-since=i' => \$modified_since,
- 'sort-desc' => \$sort_desc,
- 'file=s' => \$input_file,
- 'verbose' => \$verbose,
- "db-host=s" => \$db_host,
- "db-user=s" => \$db_user,
- "db-pass=s" => \$db_pass,
- "db-port=s" => \$db_port,
- 'all', # now assumed to be true when --file is unset
- 'help'
-);
-
-sub announce {
- my $msg = shift;
- my $force = shift;
- return unless $force || $verbose;
- print DateTime->now->strftime('%F %T') . " [$$] $msg\n";
-}
-
-if (!$result or $options{help}) {
- pod2usage(0);
-}
-
-OpenSRF::System->bootstrap_client(config_file => $bootstrap);
-Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
-
-# must be loaded and initialized after the IDL is parsed
-use OpenILS::Utils::CStoreEditor;
-OpenILS::Utils::CStoreEditor::init();
-
-my $e = OpenILS::Utils::CStoreEditor->new;
-
-if ($input_file) {
- open FILE, "<", $input_file or die "Can't open file " . $input_file;
- while(<FILE>) {
- chomp;
- if($_) {
- push(@records, $_);
- }
- }
- close FILE;
-
-} else {
-
-
- my $where = "WHERE not deleted";
- $where .= " AND id >= $start_id" if $start_id;
- $where .= " AND id <= $end_id" if $end_id;
-
- my $order = "ORDER BY id";
- $order .= " DESC" if $sort_desc;
-
- if (defined $days_back or defined $modified_since) {
- my $date_field = defined $days_back ? 'create_date' : 'edit_date';
- my $duration = defined $days_back ? $days_back : $modified_since;
- $where .= " AND DATE($date_field) >= DATE(NOW() - '$duration day'::INTERVAL)";
- }
-
- my $sql =
- "SELECT DISTINCT(id) AS id FROM biblio.record_entry $where $order";
-
- announce("Loading record ID's with query:\n$sql");
-
- my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
- my $dbh = DBI->connect($dsn, $db_user, $db_pass);
-
- my $sth = $dbh->prepare($sql);
- $sth->execute;
-
- while (my $ref = $sth->fetchrow_hashref()) {
- push(@records, $ref->{id});
- }
-
- $sth->finish();
- $dbh->disconnect();
-}
-
-announce("Processing ".scalar(@records)." records");
-
-# Hash of controlled fields & subfields in bibliographic records, and their
-# corresponding controlling fields & subfields in the authority record
-#
-# So, if the bib 650$a can be controlled by an auth 150$a, that maps to:
-# 650 => { a => { 150 => 'a'}}
-my %controllees = (
- 100 => { a => { 100 => 'a' },
- b => { 100 => 'b' },
- c => { 100 => 'c' },
- d => { 100 => 'd' },
- #f => { 100 => 'f' },
- g => { 100 => 'g' },
- h => { 100 => 'h' },
- k => { 100 => 'k' },
- l => { 100 => 'l' },
- m => { 100 => 'm' },
- n => { 100 => 'n' },
- o => { 100 => 'o' },
- p => { 100 => 'p' },
- q => { 100 => 'q' },
- r => { 100 => 'r' },
- s => { 100 => 's' },
- t => { 100 => 't' },
- y => { 100 => 'y' },
- z => { 100 => 'z' },
- },
- 110 => { a => { 110 => 'a' },
- b => { 110 => 'b' },
- c => { 110 => 'c' },
- d => { 110 => 'd' },
- #f => { 110 => 'f' },
- g => { 110 => 'g' },
- h => { 110 => 'h' },
- k => { 110 => 'k' },
- l => { 110 => 'l' },
- m => { 110 => 'm' },
- n => { 110 => 'n' },
- o => { 110 => 'o' },
- p => { 110 => 'p' },
- r => { 110 => 'r' },
- s => { 110 => 's' },
- t => { 110 => 't' },
- y => { 110 => 'y' },
- z => { 110 => 'z' },
- },
- 111 => { a => { 111 => 'a' },
- b => { 111 => 'b' },
- c => { 111 => 'c' },
- d => { 111 => 'd' },
- e => { 111 => 'e' },
- #f => { 111 => 'f' },
- g => { 111 => 'g' },
- h => { 111 => 'h' },
- k => { 111 => 'k' },
- l => { 111 => 'l' },
- m => { 111 => 'm' },
- n => { 111 => 'n' },
- o => { 111 => 'o' },
- p => { 111 => 'p' },
- q => { 111 => 'q' },
- r => { 111 => 'r' },
- s => { 111 => 's' },
- t => { 111 => 't' },
- u => { 111 => 'u' },
- y => { 111 => 'y' },
- z => { 111 => 'z' },
- },
- 130 => { a => { 130 => 'a' },
- d => { 130 => 'd' },
- #f => { 130 => 'f' },
- g => { 130 => 'g' },
- h => { 130 => 'h' },
- k => { 130 => 'k' },
- l => { 130 => 'l' },
- m => { 130 => 'm' },
- n => { 130 => 'n' },
- o => { 130 => 'o' },
- p => { 130 => 'p' },
- r => { 130 => 'r' },
- s => { 130 => 's' },
- t => { 130 => 't' },
- x => { 130 => 'x' },
- y => { 130 => 'y' },
- z => { 130 => 'z' },
- },
- 400 => { a => { 100 => 'a' },
- b => { 100 => 'b' },
- c => { 100 => 'c' },
- d => { 100 => 'd' },
- #f => { 100 => 'f' },
- g => { 100 => 'g' },
- h => { 100 => 'h' },
- k => { 100 => 'k' },
- l => { 100 => 'l' },
- m => { 100 => 'm' },
- n => { 100 => 'n' },
- o => { 100 => 'o' },
- p => { 100 => 'p' },
- q => { 100 => 'q' },
- r => { 100 => 'r' },
- s => { 100 => 's' },
- t => { 100 => 't' },
- y => { 100 => 'y' },
- z => { 100 => 'z' },
- },
- 410 => { a => { 110 => 'a' },
- b => { 110 => 'b' },
- c => { 110 => 'c' },
- d => { 110 => 'd' },
- #f => { 110 => 'f' },
- g => { 110 => 'g' },
- h => { 110 => 'h' },
- k => { 110 => 'k' },
- l => { 110 => 'l' },
- m => { 110 => 'm' },
- n => { 110 => 'n' },
- o => { 110 => 'o' },
- p => { 110 => 'p' },
- r => { 110 => 'r' },
- s => { 110 => 's' },
- t => { 110 => 't' },
- y => { 110 => 'y' },
- z => { 110 => 'z' },
- },
- 411 => { a => { 111 => 'a' },
- b => { 111 => 'b' },
- c => { 111 => 'c' },
- d => { 111 => 'd' },
- e => { 111 => 'e' },
- #f => { 111 => 'f' },
- g => { 111 => 'g' },
- h => { 111 => 'h' },
- k => { 111 => 'k' },
- l => { 111 => 'l' },
- m => { 111 => 'm' },
- n => { 111 => 'n' },
- o => { 111 => 'o' },
- p => { 111 => 'p' },
- q => { 111 => 'q' },
- r => { 111 => 'r' },
- s => { 111 => 's' },
- t => { 111 => 't' },
- u => { 111 => 'u' },
- y => { 111 => 'y' },
- z => { 111 => 'z' },
- },
- 600 => { a => { 100 => 'a' },
- b => { 100 => 'b' },
- c => { 100 => 'c' },
- d => { 100 => 'd' },
- #f => { 100 => 'f' },
- g => { 100 => 'g' },
- h => { 100 => 'h' },
- k => { 100 => 'k' },
- l => { 100 => 'l' },
- m => { 100 => 'm' },
- n => { 100 => 'n' },
- o => { 100 => 'o' },
- p => { 100 => 'p' },
- q => { 100 => 'q' },
- r => { 100 => 'r' },
- s => { 100 => 's' },
- t => { 100 => 't' },
- v => { 100 => 'v' },
- x => { 100 => 'x' },
- y => { 100 => 'y' },
- z => { 100 => 'z' },
- },
- 610 => { a => { 110 => 'a' },
- b => { 110 => 'b' },
- c => { 110 => 'c' },
- d => { 110 => 'd' },
- #f => { 110 => 'f' },
- g => { 110 => 'g' },
- h => { 110 => 'h' },
- k => { 110 => 'k' },
- l => { 110 => 'l' },
- m => { 110 => 'm' },
- n => { 110 => 'n' },
- o => { 110 => 'o' },
- p => { 110 => 'p' },
- r => { 110 => 'r' },
- s => { 110 => 's' },
- t => { 110 => 't' },
- v => { 110 => 'v' },
- x => { 110 => 'x' },
- y => { 110 => 'y' },
- z => { 110 => 'z' },
- },
- 611 => { a => { 111 => 'a' },
- b => { 111 => 'b' },
- c => { 111 => 'c' },
- d => { 111 => 'd' },
- e => { 111 => 'e' },
- #f => { 111 => 'f' },
- g => { 111 => 'g' },
- h => { 111 => 'h' },
- k => { 111 => 'k' },
- l => { 111 => 'l' },
- m => { 111 => 'm' },
- n => { 111 => 'n' },
- o => { 111 => 'o' },
- p => { 111 => 'p' },
- q => { 111 => 'q' },
- r => { 111 => 'r' },
- s => { 111 => 's' },
- t => { 111 => 't' },
- u => { 111 => 'u' },
- v => { 111 => 'v' },
- x => { 111 => 'x' },
- y => { 111 => 'y' },
- z => { 111 => 'z' },
- },
- 630 => { a => { 130 => 'a' },
- d => { 130 => 'd' },
- #f => { 130 => 'f' },
- g => { 130 => 'g' },
- h => { 130 => 'h' },
- k => { 130 => 'k' },
- l => { 130 => 'l' },
- m => { 130 => 'm' },
- n => { 130 => 'n' },
- o => { 130 => 'o' },
- p => { 130 => 'p' },
- r => { 130 => 'r' },
- s => { 130 => 's' },
- t => { 130 => 't' },
- v => { 130 => 'v' },
- x => { 130 => 'x' },
- y => { 130 => 'y' },
- z => { 130 => 'z' },
- },
- 650 => { a => { 150 => 'a' },
- b => { 150 => 'b' },
- c => { 150 => 'c' },
- d => { 150 => 'd' },
- v => { 150 => 'v' },
- x => { 150 => 'x' },
- y => { 150 => 'y' },
- z => { 150 => 'z' },
- },
- 651 => { a => { 151 => 'a' },
- b => { 151 => 'b' },
- v => { 151 => 'v' },
- x => { 151 => 'x' },
- y => { 151 => 'y' },
- z => { 151 => 'z' },
- },
- 655 => { a => { 155 => 'a' },
- b => { 155 => 'b' },
- c => { 155 => 'c' },
- v => { 155 => 'v' },
- x => { 155 => 'x' },
- y => { 155 => 'y' },
- z => { 155 => 'z' },
- },
- 700 => { a => { 100 => 'a' },
- b => { 100 => 'b' },
- c => { 100 => 'c' },
- d => { 100 => 'd' },
- #f => { 100 => 'f' },
- g => { 100 => 'g' },
- h => { 100 => 'h' },
- k => { 100 => 'k' },
- l => { 100 => 'l' },
- m => { 100 => 'm' },
- n => { 100 => 'n' },
- o => { 100 => 'o' },
- p => { 100 => 'p' },
- q => { 100 => 'q' },
- r => { 100 => 'r' },
- s => { 100 => 's' },
- t => { 100 => 't' },
- y => { 100 => 'y' },
- z => { 100 => 'z' },
- },
- 710 => { a => { 110 => 'a' },
- b => { 110 => 'b' },
- c => { 110 => 'c' },
- d => { 110 => 'd' },
- #f => { 110 => 'f' },
- g => { 110 => 'g' },
- h => { 110 => 'h' },
- k => { 110 => 'k' },
- l => { 110 => 'l' },
- m => { 110 => 'm' },
- n => { 110 => 'n' },
- o => { 110 => 'o' },
- p => { 110 => 'p' },
- r => { 110 => 'r' },
- s => { 110 => 's' },
- t => { 110 => 't' },
- y => { 110 => 'y' },
- z => { 110 => 'z' },
- },
- 711 => { a => { 111 => 'a' },
- b => { 111 => 'b' },
- c => { 111 => 'c' },
- d => { 111 => 'd' },
- e => { 111 => 'e' },
- #f => { 111 => 'f' },
- g => { 111 => 'g' },
- h => { 111 => 'h' },
- k => { 111 => 'k' },
- l => { 111 => 'l' },
- m => { 111 => 'm' },
- n => { 111 => 'n' },
- o => { 111 => 'o' },
- p => { 111 => 'p' },
- q => { 111 => 'q' },
- r => { 111 => 'r' },
- s => { 111 => 's' },
- t => { 111 => 't' },
- u => { 111 => 'u' },
- y => { 111 => 'y' },
- z => { 111 => 'z' },
- },
- 730 => { a => { 130 => 'a' },
- d => { 130 => 'd' },
- #f => { 130 => 'f' },
- g => { 130 => 'g' },
- h => { 130 => 'h' },
- k => { 130 => 'k' },
- l => { 130 => 'l' },
- m => { 130 => 'm' },
- n => { 130 => 'n' },
- o => { 130 => 'o' },
- p => { 130 => 'p' },
- r => { 130 => 'r' },
- s => { 130 => 's' },
- t => { 130 => 't' },
- y => { 130 => 'y' },
- z => { 130 => 'z' },
- },
- 800 => { a => { 100 => 'a' },
- b => { 100 => 'b' },
- c => { 100 => 'c' },
- d => { 100 => 'd' },
- #f => { 100 => 'f' },
- g => { 100 => 'g' },
- h => { 100 => 'h' },
- k => { 100 => 'k' },
- l => { 100 => 'l' },
- m => { 100 => 'm' },
- n => { 100 => 'n' },
- o => { 100 => 'o' },
- p => { 100 => 'p' },
- q => { 100 => 'q' },
- r => { 100 => 'r' },
- s => { 100 => 's' },
- t => { 100 => 't' },
- y => { 100 => 'y' },
- z => { 100 => 'z' },
- },
- 810 => { a => { 110 => 'a' },
- b => { 110 => 'b' },
- c => { 110 => 'c' },
- d => { 110 => 'd' },
- #f => { 110 => 'f' },
- g => { 110 => 'g' },
- h => { 110 => 'h' },
- k => { 110 => 'k' },
- l => { 110 => 'l' },
- m => { 110 => 'm' },
- n => { 110 => 'n' },
- o => { 110 => 'o' },
- p => { 110 => 'p' },
- r => { 110 => 'r' },
- s => { 110 => 's' },
- t => { 110 => 't' },
- y => { 110 => 'y' },
- z => { 110 => 'z' },
- },
- 811 => { a => { 111 => 'a' },
- b => { 111 => 'b' },
- c => { 111 => 'c' },
- d => { 111 => 'd' },
- e => { 111 => 'e' },
- #f => { 111 => 'f' },
- g => { 111 => 'g' },
- h => { 111 => 'h' },
- k => { 111 => 'k' },
- l => { 111 => 'l' },
- m => { 111 => 'm' },
- n => { 111 => 'n' },
- o => { 111 => 'o' },
- p => { 111 => 'p' },
- q => { 111 => 'q' },
- r => { 111 => 'r' },
- s => { 111 => 's' },
- t => { 111 => 't' },
- u => { 111 => 'u' },
- y => { 111 => 'y' },
- z => { 111 => 'z' },
- },
- 830 => { a => { 130 => 'a' },
- d => { 130 => 'd' },
- #f => { 130 => 'f' },
- g => { 130 => 'g' },
- h => { 130 => 'h' },
- k => { 130 => 'k' },
- l => { 130 => 'l' },
- m => { 130 => 'm' },
- n => { 130 => 'n' },
- o => { 130 => 'o' },
- p => { 130 => 'p' },
- r => { 130 => 'r' },
- s => { 130 => 's' },
- t => { 130 => 't' },
- x => { 130 => 'x' },
- y => { 130 => 'y' },
- z => { 130 => 'z' },
- },
-);
-
-# mapping of authority leader/11 "Subject heading system/thesaurus"
-# to the matching bib record indicator
-my %AUTH_TO_BIB_IND2 = (
- 'a' => '0', # Library of Congress Subject Headings (ADULT)
- 'b' => '1', # Library of Congress Subject Headings (JUVENILE)
- 'c' => '2', # Medical Subject Headings
- 'd' => '3', # National Agricultural Library Subject Authority File
- 'n' => '4', # Source not specified
- 'k' => '5', # Canadian Subject Headings
- 'v' => '6', # Répertoire de vedettes-matière
- 'z' => '7' # Source specified in subfield $2 / Other
-);
-
-# Produces a new 6XX ind2 value for values found in subfield $2 when the
-# original ind2 value is 7 ("Source specified in subfield $2").
-my %REMAP_BIB_SF2_TO_IND2 = (
- lcsh => '0',
- mesh => '2',
- nal => '3',
- rvm => '6'
-);
-
-my $start_time = localtime();
-
-if($input_file) {
- announce("Start $start_time for ".scalar(@records)." records");
-} elsif($start_id) {
- announce("Start $start_time for record range: $start_id => $end_id");
-} else {
- announce("Start $start_time for all records");
-}
-
-# Fetch leader/008 values for authority records. Filter out any whose
-# 008 14 or 15 field are not appropriate for the requested bib tag.
-# https://www.loc.gov/marc/authority/ad008.html
-sub authority_leaders_008_14_15 {
- my ($e, $bib_tag, $auth_ids) = @_;
-
- my $auth_leaders = $e->json_query({
- select => {afr => ['record', 'value']},
- from => 'afr',
- where => {'+afr' => {tag => '008', record => $auth_ids}}
- });
-
- my $index;
- $index = 14 if $bib_tag =~ /^[17]/; # author/name record
- $index = 15 if $bib_tag =~ /^6/; # subject record
-
- # avoid checking any other types of authority records.
- return $auth_leaders unless $index;
-
- my @keepers;
- for my $leader (@$auth_leaders) {
- my $value = $leader->{value} || '';
- if (substr($value, $index, 1) eq 'a') {
- push(@keepers, $leader);
- } else {
- announce("Skipping authority record ".$leader->{record}.
- " on bib $bib_tag match; 008/#14|#15 not appropriate");
- }
- }
-
- return \@keepers;
-}
-
-# given a set of authority record ID's and a controlled bib field,
-# returns the ID of the first authority record in the set that
-# matches the thesaurus spec of the bib record.
-sub find_matching_auth_for_thesaurus {
- my ($e, $bib_field, $auth_leaders) = @_;
-
- # bib field thesaurus spec
- my $cfield_ind2 = $bib_field->indicator(2);
-
- announce("6XX indicator 2 value = $cfield_ind2");
-
- my $is_local = 0;
- if ($cfield_ind2 eq '7') {
- # subject thesaurus code is embedded in the bib field subfield 2
-
- $is_local = 1;
-
- my $thesaurus = $bib_field->subfield('2') || '';
- announce("Found local thesaurus value $thesaurus");
-
- # if we have no special remapping value for the found thesaurus,
- # fall back to ind2 => 7=Other.
- $cfield_ind2 = $REMAP_BIB_SF2_TO_IND2{$thesaurus} || '7';
-
- announce("Local thesaurus '$thesaurus' ".
- "remapped to ind2 value '$cfield_ind2'");
- }
-
- my $authz_found = undef;
- for my $leader (@$auth_leaders) {
- my $value = $leader->{value};
- next unless $value;
-
- my $thesaurus = substr($value, 11, 1); # leader/11 -- zero based.
-
- # Note for later that we encountered an authority record
- # whose thesaurus values is z=Other.
- $authz_found = $leader->{record} if $thesaurus eq 'z';
-
- if ($AUTH_TO_BIB_IND2{$thesaurus} eq $cfield_ind2) {
- announce("Found a match on thesaurus ".
- "'$thesaurus' for " . $leader->{record});
- return $leader->{record};
- }
- }
-
- # If the bib field in question has a locally encoded thesaurus
- # (ind2=7) and no auth record was found above via remapped
- # thesaurus value, use the authority record with thesaurus z=Other.
- return $authz_found if $is_local;
-
- return undef;
-}
-
-# Returns true if the thesaurus controlling the bib field is "fast".
-sub is_fast_heading {
- my $bib_field = shift;
-
- if ($bib_field->tag() =~ /^65[015]/) {
- my $ind2 = $bib_field->indicator(2) || '';
-
- if ($ind2 eq '7') { # field controlled by "other"
- my $thesaurus = $bib_field->subfield('2') || '';
- return $thesaurus eq 'fast';
- }
- }
-
- return 0;
-}
-
-sub update_record {
- my ($record, $marc) = @_;
-
- my $xml = $marc->as_xml_record();
- $xml =~ s/\n//sgo;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = OpenILS::Application::AppUtils->entityize($xml);
-
- $record->marc($xml);
-
- my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1);
- if ($editor->update_biblio_record_entry($record)) {
- $editor->commit();
- } else {
- $editor->rollback();
- }
-}
-
-my $count = 0;
-my $total = scalar(@records);
-announce("processing $total bib records", 1);
-
-foreach my $rec_id (@records) {
- $count++;
-
- announce("processing $count of $total", 1) if ($count % 1000) == 0;
-
- announce("processing bib record $rec_id [$count of $total]");
-
- # State variable; was the record changed?
- my $changed = 0;
-
- # get the record
- my $record = $e->retrieve_biblio_record_entry($rec_id);
- next unless $record && $record->deleted eq 'f';
-
- eval {
- my $marc = MARC::Record->new_from_xml($record->marc());
-
- # get the list of controlled fields
- my @c_fields = keys %controllees;
-
- foreach my $c_tag (@c_fields) {
- my @c_subfields = keys %{$controllees{"$c_tag"}};
-
- # Get the MARCXML from the record and check for controlled fields/subfields
- my @bib_fields = ($marc->field($c_tag));
- foreach my $bib_field (@bib_fields) {
-
- my $sf0 = $bib_field->subfield('0') || '';
- my $is_fast_heading = is_fast_heading($bib_field);
-
- if ($is_fast_heading && $sf0 =~ /\)fst/) {
- # fast heading looks OK. ignore it.
- announce("Ignoring FAST heading field on ".
- "rec=$rec_id and tag=$c_tag \$0 $sf0");
- next;
- }
-
- if ($sf0 && $refresh) {
- announce("Removing \$0 $sf0 for rec=$rec_id and tag=$c_tag");
- $bib_field->delete_subfield(code => '0');
- $changed = 1;
- }
-
- if ($is_fast_heading) {
- # Update record after potentially removing a bogus
- # fast heading above.
- update_record($record, $marc) if $changed;
-
- # We don't control fast headings, so there's nothing
- # left to do. Move on to the next field...
- announce("No linking performed on FAST heading ".
- "field on rec=$rec_id and tag=$c_tag");
- next;
- }
-
- my %match_subfields;
- my $match_tag;
- my @searches;
- foreach my $c_subfield (@c_subfields) {
- my @sf_values = $bib_field->subfield($c_subfield);
- if (@sf_values) {
- # Give me the first element of the list of authority controlling tags for this subfield
- # XXX Will we need to support more than one controlling tag per subfield? Probably. That
- # will suck. Oh well, leave that up to Ole to implement.
- $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
- $match_tag = $match_subfields{$c_subfield};
- push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
- }
- }
- next if !$match_tag;
-
- announce("Searching for matches on controlled field $c_tag ".
- "(auth tag=$match_tag): \n - ".Dumper(\@searches));
-
- my @tags = ($match_tag);
-
- # Now we've built up a complete set of matching controlled
- # subfields for this particular field; let's check to see if
- # we have a matching authority record
- my $session = OpenSRF::AppSession->create("open-ils.search");
- my $validates = $session->request("open-ils.search.authority.validate.tag.id_list",
- "tags", \@tags, "searches", \@searches
- )->gather();
- $session->disconnect();
-
-
- # Protect against failed (error condition) search request
- if (!$validates) {
- print STDERR "Search for matching authority failed; record # $rec_id\n";
- next if (!$changed);
- }
-
- announce("Match query returned @$validates");
-
- # No matches found. Nothing left to do for this field.
- next if scalar(@$validates) == 0;
-
- # Iterate through the returned authority record IDs to delete any
- # matching $0 subfields already in the bib record
- foreach my $auth_zero (@$validates) {
- $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
- }
-
- # Find the best authority record to use for linking.
-
- my $auth_leaders;
- if ($bib_field->tag =~ /^[167]/) {
- # For 1XX, 6XX, and 7XX bib fields, only link to
- # authority records whose leader/008 positions
- # 14 and 15 are coded to allow use as a name/author
- # or subject record, depending.
-
- $auth_leaders = authority_leaders_008_14_15(
- $e, $bib_field->tag, $validates);
-
- $validates = [map {$_->{record}} @$auth_leaders];
- }
-
- my $auth_id;
-
- if ($bib_field->tag() =~ /^65[015]/) {
- # Using the indicator-2 value from the controlled bib
- # field, find the first authority in the list of matches
- # that uses the same thesaurus. If no such authority
- # is found, no matching occurs.
- # TODO: perhaps this step should be part of the
- # validation API search call above.
-
- $auth_id = find_matching_auth_for_thesaurus(
- $e, $bib_field, $auth_leaders) || '';
-
- } else {
-
- # For all other controlled fields, use the first
- # authority record in the result set.
- $auth_id = $validates->[0];
- }
-
- # Don't exit here just because we have no $auth_id. The
- # bib field could have been changed above in the cleanup /
- # delete phase.
-
- if ($auth_id) {
- # Add the auth ID and control number agency info from the
- # matching authority record to the controlled bib field.
- $changed = 1;
- $bib_field->add_subfields('0' => "($CNI)$auth_id");
- announce("auth=$auth_id cni=$CNI. It's a match!");
- }
- }
- }
-
- update_record($record, $marc) if $changed;
-
- };
-
- if ($@) {
- print STDERR "\nRecord # $rec_id : $@\n";
- import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire process
- }
-}
-
-my $end_time = localtime();
-
-if($input_file) {
- announce("Stop $end_time for ".scalar(@records)." records");
-} elsif($start_id) {
- announce("Start $end_time for record range: $start_id => $end_id");
-} else {
- announce("Start $end_time for all records");
-}
-
-__END__
-
-=head1 NAME
-
-authority_control_fields.pl - Controls fields in bibliographic records with authorities in Evergreen
-
-=head1 SYNOPSIS
-
-C<authority_control_fields.pl> [B<--configuration>=I<opensrf_core.conf>] [B<--refresh>]
-[[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
-
-=head1 DESCRIPTION
-
-For a given set of records:
-
-=over
-
-=item * Iterate through the list of fields that are controlled fields
-
-=item * Iterate through the list of subfields that are controlled for
-that given field
-
-=item * Search for a matching authority record for that combination of
-field + subfield(s)
-
-=over
-
-=item * If we find a match, then add a $0 subfield to that field identifying
-the controlling authority record
-
-=item * If we do not find a match, then insert a row into an "uncontrolled"
-table identifying the record ID, field, and subfield(s) that were not controlled
-
-=back
-
-=item * Iterate through the list of floating subdivisions
-
-=over
-
-=item * If we find a match, then add a $0 subfield to that field identifying
-the controlling authority record
-
-=item * If we do not find a match, then insert a row into an "uncontrolled"
-table identifying the record ID, field, and subfield(s) that were not controlled
-
-=back
-
-=item * If we changed the record, update it in the database
-
-=back
-
-=head1 OPTIONS
-
-=over
-
-=item * B<-f>, B<--file>
-
-Specifies a file of bibs ids to link.
-
-=item * B<-c> I<config-file>, B<--configuration>=I<config-file>
-
-Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
-Defaults to F</openils/conf/opensrf_core.xml>
-
-=item * B<-r> I<record-ID>, B<--record>=I<record-ID>
-
-Specifies the bibliographic record ID (found in the C<biblio.record_entry.id>
-column) of the record to process. This option may be specified more than once
-to process multiple records in a single run.
-
-=item * B<-a>, B<--all>
-
-Specifies that all bibliographic records should be processed. For large
-databases, this may take an extraordinarily long amount of time.
-
-=item * B<-r>, B<--refresh>
-
-Specifies that all authority links should be removed from the target
-bibliographic record(s). This will effectively rewrite all authority
-linking anew.
-
-=item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
-
-Specifies the starting ID of the range of bibliographic records to process.
-This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
-option.
-
-=item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
-
-Specifies the ending ID of the range of bibliographic records to process.
-This option is ignored unless it is accompanied by the B<-s> or B<--start>
-option.
-
-=back
-
-=head1 EXAMPLES
-
- authority_control_fields.pl --start_id 1 --end_id 50000
-
-Processes the bibliographic records with IDs between 1 and 50,000 using the
-default OpenSRF configuration file for connection information.
-
-=head1 AUTHOR
-
-Dan Scott <dscott@laurentian.ca>
-
-=head1 COPYRIGHT AND LICENSE
-
-Copyright 2010-2011 by Dan Scott
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
-
-=cut
-
+++ /dev/null
-#!/usr/bin/perl
-# ----------------------------------------------------------------------
-# Find bib records matching the requested criteria for linking.
-# Bib IDs are exported to one or more batch files for future processing.
-# ----------------------------------------------------------------------
-use strict;
-use warnings;
-use DBI;
-use Getopt::Long;
-use DateTime;
-
-my $db_handle;
-my $counter = 0;
-
-# options
-my $help;
-my $modified_since;
-my $exported_since;
-my $batch_size = 10000;
-my $start_id;
-my $end_id;
-my $count_only;
-my $out_dir = '/tmp';
-my $db_host = $ENV{PGHOST} || 'localhost';
-my $db_port = $ENV{PGPORT} || '5432';
-my $db_user = $ENV{PGUSER} || 'evergreen';
-my $db_name = $ENV{PGDATABASE} || 'evergreen';
-my $db_pass = $ENV{PGPASSWORD};
-
-my $opt_result = GetOptions(
- 'modified-since=s' => \$modified_since,
- 'exported-since=s' => \$exported_since,
- 'start-id=i' => \$start_id,
- 'end-id=i' => \$end_id,
- 'batch-size=i' => \$batch_size,
- 'count-only' => \$count_only,
- 'out-dir=s' => \$out_dir,
- "db-host=s" => \$db_host,
- "db-user=s" => \$db_user,
- "db-pass=s" => \$db_pass,
- "db-port=s" => \$db_port,
- 'help' => \$help
-);
-
-sub announce {
- my $msg = shift;
- print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n";
-}
-
-sub help {
- print <<HELP;
- Find IDs for bib records based on various criteria. Write bib
- IDs to batch files. Batch files are placed into --out-dir and
- named bib-ids.001, bib-ids.002, etc.
-
- Usage:
-
- Find
-
- $0 --modified-since 1 --batch-size 100 \
- --out-dir /openils/var/data/linkbibs/2016-12-01
-
- Options:
-
- --modified-since <YYYY-MM-DD>
- Limit bibs to those modifed since the specified date.
-
- --exported-since <YYYY-MM-DD>
- Limit bibs to those exported since the specified date.
- Export date is based on data found in the
- metabib.bib_export_data table.
-
- --start-id <id>
- Limit bibs to those whose ID is no less than <id>
-
- --end-id <id>
- Limit bibs to those whose ID is no greater than <id>
-
- --out-dir [/tmp]
- Output directory.
-
- --batch-size
- Number of bib IDs to write to each batch file.
-
- --count-only
- Print the total number of records that would be added
- to batch files without adding to any batch files.
-
- --db-host
- --db-user
- --db-pass
- --db-port
- Database connection params. PG environment variables are
- also inspected for values. When all else fails, try to
- connect to database evergreen\@localhost
-HELP
- exit 0;
-}
-
-help() if $help || !$opt_result;
-
-sub connect_db {
- $db_handle = DBI->connect(
- "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
- $db_user, $db_pass, {
- RaiseError => 1,
- PrintError => 0,
- AutoCommit => 1,
- pg_expand_array => 0,
- pg_enable_utf8 => 1
- }
- ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
-}
-
-connect_db();
-
-# ----------------------------------------------------------------------
-my $from = 'FROM biblio.record_entry bre';
-
-my $where = 'WHERE NOT bre.deleted';
-$where .= " AND bre.id >= $start_id" if $start_id;
-$where .= " AND bre.id <= $end_id" if $end_id;
-
-if ($exported_since) {
- $where .= " AND bed.export_date > '$exported_since'";
- $from .= " JOIN metabib.bib_export_data bed ON (bed.bib = bre.id)";
-}
-
-my $sql = <<SQL;
- SELECT bre.id
- $from
- $where
- ORDER BY bre.id DESC;
-SQL
-
-my $sth = $db_handle->prepare($sql);
-$sth->execute;
-
-my $batch_file;
-sub open_batch_file {
- my $path = shift;
- announce("Starting new batch file: $path");
-
- close $batch_file if $batch_file;
-
- open $batch_file, '>', $path or
- die "Cannot open batch file for writing: $!\n";
-}
-
-my $ctr = 0;
-my $batch = 0;
-while (my $ref = $sth->fetchrow_hashref()) {
- $ctr++;
- next if $count_only;
-
- if (( ($ctr - 1) % $batch_size) == 0) {
- my $path = sprintf("$out_dir/bib-ids.%0.3d", $batch);
- open_batch_file($path);
- $batch++;
- }
-
- print $batch_file $ref->{id} . "\n";
-}
-
-close $batch_file if $batch_file;
-$sth->finish;
-
-announce("Found $ctr bib records");
-
+++ /dev/null
-#!/bin/bash
-set -eu
-PROC_COUNT=3
-BATCHES_PER_PROC=3
-BATCH_DIR="/openils/var/data/linkbibs"
-
-DOW=$(date +%u);
-[ $DOW == 7 ] && DOW=0; # make Sunday = 0
-
-BATCH=$(echo "$PROC_COUNT * $BATCHES_PER_PROC * $DOW" | bc);
-
-echo "Starting at batch number $BATCH";
-
-function execute_background_proc {
- START=$1
- for batch in $(seq 1 $BATCHES_PER_PROC); do
- FILE=$(printf "$BATCH_DIR/bib-ids.%0.3d" $((($START + $batch))));
- echo "Linking bib file $FILE"
- perl ./authority_control_fields.pl --refresh --file $FILE
- done;
-}
-
-for PROC in $(seq 1 $PROC_COUNT); do
- (execute_background_proc $BATCH) &
- BATCH=$((($BATCH + BATCHES_PER_PROC)));
-done
-
-wait;
-
-echo "Done processing all batches"
-
+++ /dev/null
-#!/usr/bin/perl
-# ----------------------------------------------------------------------
-# Find authority records newer than a specified age. Once found,
-# run each through the auth-to-auth linking process. Then locate
-# bib records that we might want to link to the new records and
-# pass them off to the bib-to-auth linker.
-# ----------------------------------------------------------------------
-use strict;
-use warnings;
-use DBI;
-use Getopt::Long;
-use DateTime;
-use Pod::Usage qw/pod2usage/;
-use Time::HiRes qw/usleep/;
-
-my @auth_ids;
-my @bib_ids;
-my $counter = 0;
-
-# options
-my $help;
-my $modified_since;
-my $max_auth_count;
-my $start_auth_id;
-my $print_auth_ids;
-my $print_bib_ids;
-my $link_auths;
-my $link_bibs;
-my $progress;
-my $db_host = $ENV{PGHOST} || 'localhost';
-my $db_port = $ENV{PGPORT} || '5432';
-my $db_user = $ENV{PGDATABASE} || 'evergreen';
-my $db_pass = $ENV{PGPASSWORD};
-
-my $opt_result = GetOptions(
- 'modified-since=i' => \$modified_since,
- 'max-auth-count=i' => \$max_auth_count,
- 'start-auth-id=i' => \$start_auth_id,
- 'print-bib-ids' => \$print_bib_ids,
- 'print-auth-ids' => \$print_auth_ids,
- 'link-bibs' => \$link_bibs,
- 'link-auths' => \$link_auths,
- 'progress' => \$progress,
- "db-host=s" => \$db_host,
- "db-user=s" => \$db_user,
- "db-pass=s" => \$db_pass,
- "db-port=s" => \$db_port,
- 'help' => \$help
-);
-
-sub announce {
- my $msg = shift;
- print DateTime->now(time_zone => 'local')->strftime('%F %T')." $msg\n";
-}
-
-sub help {
- print <<HELP;
-
- Update or create bib-to-auth and auth-to-auth links for authority
- records that were created or modified within the last X days.
-
- Usage:
-
- $0 --modified-since 1 --link-auths --link-bibs
-
- Options:
-
- --modified-since <days>
- Process authority records created or modified within the
- last <days> days.
-
- --max-auth-count <count>
- Process <count> authority records in total. Use with
- --start-auth-id to process batches of records across
- multiple instances of the script.
-
- --start-auth-id <id>
- Process authority records whose ID is equal to or greater
- than <id>. Use with --max-auth-count to process batches
- of records accross multiple runs of the script.
-
- --print-auth-ids
- Print authority record IDs to process to STDOUT
-
- --print-bib-ids
- Print bib record IDs to process to STDOUT
-
- --link-auths
- Run idenditifed authority records through authority_authority_linker.pl
-
- --link-bibs
- Run idenditifed bib records through authority_control_fields.pl
-
- --progress
- Log linking progess to STDOUT
-
- --db-host
- --db-user
- --db-pass
- --db-port
-
- Database connection params. PG environment variables are
- also inspected for values. When all else fails, try to
- connect to database evergreen\@localhost
-HELP
- exit 0;
-}
-
-help() if $help || !$opt_result;
-
-my $dsn = "dbi:Pg:database=evergreen;host=$db_host;port=$db_port";
-my $dbh = DBI->connect($dsn, $db_user, $db_pass)
- or die "Cannot connect to database: $dsn\n";
-
-$dbh->do('SET statement_timeout = 0');
-
-# ----------------------------------------------------------------------
-# Load the authority record IDs
-my $where2 = $start_auth_id ? "AND id >= $start_auth_id" : '';
-my $limit = $max_auth_count ? "LIMIT $max_auth_count" : '';
-
-my $sth = $dbh->prepare(<<SQL);
- SELECT id FROM authority.record_entry
- WHERE DATE(edit_date) >= DATE(NOW() - '$modified_since day'::INTERVAL)
- $where2
- ORDER BY id
- $limit
-SQL
-
-$sth->execute;
-
-while (my $ref = $sth->fetchrow_hashref()) {
- push(@auth_ids, $ref->{id});
-}
-$sth->finish;
-
-my $auth_rec_count = scalar(@auth_ids);
-print join("\n", @auth_ids) if $print_auth_ids;
-
-# Let the caller know what the last record processed will be,
-# so the next iteration of the script can start there.
-announce("Final auth ID will be: " . $auth_ids[-1]) if $max_auth_count;
-
-if (!@auth_ids) {
- announce("No authority records edited in the last $modified_since days");
- exit 0;
-}
-
-# ----------------------------------------------------------------------
-# Auth-to-Auth linking
-
-if ($link_auths) {
- # Pass all new authority records to the auth-to-auth linker
- for my $rec_id (@auth_ids) {
-
- system(
- './authority_authority_linker.pl',
- '--db-host', $db_host,
- '--db-user', $db_user,
- '--db-pass', ($db_pass || ''),
- '--record', $rec_id
- );
-
- usleep(250000); # 1/4 second; allow ctrl-c to penetrate
- announce("Auth records processed: $counter/$auth_rec_count")
- if $progress && ++$counter % 10 == 0;
- }
-}
-$counter = 0;
-
-# Exit if there is nothing left to do.
-exit unless $print_bib_ids || $link_bibs;
-
-# ----------------------------------------------------------------------
-# Find bib records that we might want to link to the new authority
-# record.
-#
-# Query: give me bib records that link to browse entries that also
-# link to exactly one authority record, specifically the new authority
-# records we are processing via this script. Only include bib records
-# that are not already linked via bib_linking to said authority record.
-# This represents the set of bib records that might need to be linked
-# to our new authority records.
-# ----------------------------------------------------------------------
-my %bib_ids; # de-dupe by record ID.
-my $auth_ids_param = join(',', @auth_ids);
-
-for my $axis (qw/author subject series title/) {
- my $query = <<SQL;
-SELECT
- entry.id,
- are.id AS auth_record,
- def.source AS bib_record
-FROM metabib.browse_${axis}_entry entry
- JOIN metabib.browse_${axis}_entry_simple_heading_map map
- ON (map.entry = entry.id)
- JOIN authority.simple_heading ash ON (ash.id = map.simple_heading)
- JOIN authority.record_entry are ON (are.id = ash.record)
- JOIN metabib.browse_${axis}_entry_def_map def ON (def.entry = entry.id)
- JOIN biblio.record_entry bre ON (bre.id = def.source)
- JOIN (
- -- we only care about browse entries that link to
- -- exactly one auth record, the auth record in question.
- SELECT entry.id, COUNT(are.id)
- FROM metabib.browse_${axis}_entry entry
- JOIN metabib.browse_${axis}_entry_simple_heading_map map
- ON (map.entry = entry.id)
- JOIN authority.simple_heading ash
- ON (ash.id = map.simple_heading)
- JOIN authority.record_entry are
- ON (are.id = ash.record)
- WHERE NOT are.deleted
- GROUP BY 1
- HAVING COUNT(are.id) = 1
- ) singles ON (singles.id = entry.id)
- LEFT JOIN authority.bib_linking link
- ON (link.bib = def.source AND link.authority = are.id)
-WHERE
- bre.deleted IS FALSE
- AND link.authority IS NULL -- unlinked records
- AND are.id IN ($auth_ids_param)
-SQL
-
- $sth = $dbh->prepare($query);
- $sth->execute;
- while (my $ref = $sth->fetchrow_hashref()) {
- $bib_ids{$ref->{bib_record}} = 1; # de-dupe
- }
- $sth->finish;
-}
-
-@bib_ids = sort(keys(%bib_ids));
-my $bib_rec_count = scalar(@bib_ids);
-
-if ($link_bibs) {
- for my $rec_id (@bib_ids) {
- # fire off the linker for each of the records identied
- system('./authority_control_fields.pl',
- '--db-host', $db_host,
- '--db-user', $db_user,
- '--db-pass', ($db_pass || ''),
- '--record', $rec_id,
- '--refresh'
- );
-
- usleep(250000); # 1/4 second; allow ctrl-c to penetrate
- announce("Bib records processed: $counter/$bib_rec_count")
- if $progress && ++$counter % 10 == 0;
- }
-}
-
-print join("\n", @bib_ids) if $print_bib_ids;
-