--- /dev/null
+#!/usr/bin/env perl
+# -----------------------------------------------------------------------
+# Export bib records for Backstage processing.
+#
+# The UTF-8 encoded USMARC string for each record is printed to STDOUT.
+# Each exported bib has its export_date value updated to NOW().
+#
+# Exported bibs meet the following criteria:
+#
+# 1. Delete flag must be false.
+# 2. Record cannot contain any 086, 092, or 099 tags containing the phrase 'on order'
+# 3. Boolean filter:
+# [ (001_test OR 035_test) AND has_holdings AND cat_date_in_range ]
+# OR
+# [ 998_test AND create_date_in_range ]
+# -----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use Getopt::Long;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+
+my $db_handle;
+
+my $start_date;
+my $end_date;
+my $export_date;
+my $ids_only;
+my $count_only;
+my $out_file;
+my $limit;
+my $db_user = 'evergreen';
+my $db_host = 'localhost';
+my $db_name = 'evergreen';
+my $db_port = 5432;
+my $db_pass;
+my $help;
+
+GetOptions(
+ 'start-date=s' => \$start_date,
+ 'end-date=s' => \$end_date,
+ 'export-date=s' => \$export_date,
+ 'ids-only' => \$ids_only,
+ 'count-only' => \$count_only,
+ 'out-file=s' => \$out_file,
+ 'limit=f' => \$limit,
+ 'db-user=s' => \$db_user,
+ 'db-host=s' => \$db_host,
+ 'db-name=s' => \$db_name,
+ 'db-port=i' => \$db_port,
+ 'db-pass=s' => \$db_pass,
+ 'help' => \$help
+);
+
+sub help {
+ print <<HELP;
+
+Export bib records for uploading to Backstage for processing.
+MARC data is sent to STDOUT. Redirect as needed.
+
+$0 --start-date 2015-06-01 \
+ --end-date 2016-06-01 \
+ --export-date 2016-06-01
+
+Options
+
+ --start-date <YYYY-MM-DD>
+ --end-date <YYYY-MM-DD>
+ Export bib records whose cataloging_date (for physical records) or
+ create_date (for electronic records) value is between the provided
+ start and end dates.
+
+ --export-date <YYYY-MM-DD>
+ Sets the export date to the provided value. If no --export-date
+ value is set, no export date value will be applied in the database.
+
+ --out-file </path/to/file>
+ Write MARC records (or IDs) to this file.
+
+ --ids-only
+ Write bib record IDs to the output file instead of the full MARC
+ record.
+
+ --count-only
+ Only print the number of bibs that would be exported to STDOUT.
+
+ --limit
+ Export at most this many records.
+
+HELP
+ exit;
+}
+
+die "--start-date and --end-date required\n"
+ unless $start_date && $end_date;
+
+die "Invalid date format\n" unless
+ $start_date =~ /^\d{4}-\d{2}-\d{2}$/ &&
+ $end_date =~ /^\d{4}-\d{2}-\d{2}$/ &&
+ (!$export_date || $export_date =~ /^\d{4}-\d{2}-\d{2}$/);
+
+die "--out-file <filename> required\n" unless $out_file || $count_only;
+
+sub bib_query {
+ my $sql = <<SQL;
+
+-- viable_records include filters applied to all records.
+-- not deleted
+-- is not on order (086/092/099 test)
+WITH viable_records AS (
+
+ SELECT bre.id, bre.cataloging_date, bre.create_date
+ FROM biblio.record_entry bre
+ WHERE NOT deleted
+ AND NOT EXISTS (
+ SELECT 1 FROM metabib.real_full_rec
+ WHERE record = bre.id
+ AND tag IN ('086', '092', '099')
+ AND value ILIKE '%on order%'
+ )
+
+-- electronic_records have a valid 998d value for electronic records
+), electronic_records AS (
+
+ SELECT vr.id, create_date AS filter_date
+ FROM viable_records vr
+ JOIN metabib.real_full_rec mrfr ON (vr.id = mrfr.record)
+ WHERE
+ mrfr.tag = '998' AND
+ mrfr.subfield = 'd' AND
+ mrfr.value IN ('d','t','v','w','x','y','1','6')
+
+-- physical records are non-electronic, have at least one viable
+-- linked copy, have a valid 001 OR 035 field, and have a
+-- non-null cataloging date.
+), physical_records AS (
+
+ SELECT vr.id, vr.cataloging_date AS filter_date
+ FROM viable_records vr
+ JOIN metabib.real_full_rec mfr ON (
+ mfr.record = vr.id AND (
+ (
+ mfr.tag = '001' AND (
+ mfr.value ILIKE 'oc%' OR
+ mfr.value ILIKE 'on%' OR
+ mfr.value ILIKE 'wln%'
+ )
+ ) OR (
+ mfr.tag = '035' AND
+ mfr.subfield = 'a' AND
+ mfr.value ILIKE '%WaOLN%'
+ )
+ )
+ )
+ WHERE vr.cataloging_date IS NOT NULL AND EXISTS (
+ -- bib has at least one non-deleted copy
+ SELECT acp.id
+ FROM asset.copy acp
+ WHERE call_number IN (
+ SELECT id FROM asset.call_number
+ WHERE record = vr.id AND NOT deleted
+ )
+ AND NOT deleted
+ LIMIT 1
+ )
+
+), combined_records AS (
+ SELECT id, filter_date
+ FROM electronic_records
+ UNION
+ SELECT id, filter_date
+ FROM physical_records
+)
+
+SELECT DISTINCT(cr.id)
+FROM combined_records cr
+WHERE cr.filter_date BETWEEN '$start_date' AND '$end_date'
+SQL
+
+ $sql .= " LIMIT $limit" if $limit;
+ return $sql;
+}
+
+
+# Finds bibs to export and prints their MARC to STDOUT
+sub export_marc {
+
+ if ($out_file) {
+ open(MARCFILE, ">$out_file")
+ or die "Cannot open file for writing: $out_file\n";
+ binmode(MARCFILE, ':utf8');
+ }
+
+ my $sth = $db_handle->prepare(bib_query());
+ my $edate_sth = $db_handle->prepare(
+ 'SELECT * FROM metabib.set_export_date(?, ?)');
+
+ $sth->execute;
+ my $count = 0;
+ while (my $bib = $sth->fetchrow_hashref) {
+ $count++;
+ next if $count_only;
+
+ my $bib_id = $bib->{id};
+
+ if ($ids_only) {
+ print MARCFILE "$bib_id\n";
+ print "$count records written...\n" if ($count % 1000) == 0;
+ next;
+ }
+
+ my $rec = $db_handle->selectall_arrayref(
+ "SELECT marc FROM biblio.record_entry WHERE id = $bib_id");
+
+ my $marc = $rec->[0]->[0];
+ my $marcdoc = MARC::Record->new_from_xml($marc, 'UTF-8', 'USMARC');
+
+ print MARCFILE $marcdoc->as_usmarc;
+
+ print "$count records written...\n" if ($count % 1000) == 0;
+
+ next unless $export_date;
+
+ # Update the bib record's metabib.bib_export_data entry.
+ eval { $edate_sth->execute($bib_id, $export_date) };
+ die "Error setting export date for bib ".
+ "$bib_id to $export_date : $@\n" if $@;
+ }
+
+ close(MARCFILE) if $out_file;
+
+ print "$count total bib records\n";
+
+ $sth->finish;
+ $edate_sth->finish;
+}
+
+sub connect_db {
+ $db_handle = DBI->connect(
+ "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
+ $db_user, $db_pass, {
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 1,
+ pg_expand_array => 0,
+ pg_enable_utf8 => 1
+ }
+ ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
+}
+
+connect_db();
+export_marc();
+
+$db_handle->disconnect;
+
--- /dev/null
+#!/usr/bin/env perl
+# -----------------------------------------------------------------------
+# TODO: summary
+#
+# TODO:
+# Disable auth record change propagation during auth record updates.
+# -----------------------------------------------------------------------
+use strict;
+use warnings;
+use DBI;
+use DateTime;
+use Getopt::Long;
+use MARC::Record;
+use MARC::File::XML (BinaryEncoding => 'UTF-8');
+use MARC::File::USMARC;
+use Archive::Zip qw(:ERROR_CODES :CONSTANTS);
+use File::Basename;
+use Sys::Syslog qw(syslog openlog);
+use OpenILS::Utils::Normalize qw(clean_marc);
+binmode(STDOUT, ':utf8');
+
+my $db_handle;
+my $log_mod = 500; # log every 500th of each type of event (see verbose)
+
+my $file;
+my $export_date;
+my $working_dir = '.',
+my $bib_collision_file;
+my $verbose;
+my $db_user = 'evergreen';
+my $db_host = 'localhost';
+my $db_name = 'evergreen';
+my $db_port = 5432;
+my $db_pass;
+my $syslog_facility = 'LOCAL6'; # matches Evergreen gateway
+my $syslog_ops = 'pid';
+my $syslog_ident = 'BACKSTAGE';
+
+my $new_auth_sth;
+my $mod_auth_sth;
+my $del_auth_sth;
+my $delmod_auth_sth;
+my $mod_bibs_sth;
+my $match_auth_sth;
+my $match_auth_001_sth;
+my $new_auth_ctr = 0;
+my $mod_auth_ctr = 0;
+my $del_auth_ctr = 0;
+my $mod_bibs_ctr = 0;
+my $col_bibs_ctr = 0;
+
+my $help;
+
+GetOptions(
+ 'file=s' => \$file,
+ 'export-date=s' => \$export_date,
+ 'working-dir=s' => \$working_dir,
+ 'bib-collision-file=s' => \$bib_collision_file,
+ 'verbose' => \$verbose,
+ 'db-user=s' => \$db_user,
+ 'db-host=s' => \$db_host,
+ 'db-name=s' => \$db_name,
+ 'db-port=i' => \$db_port,
+ 'db-pass=s' => \$db_pass,
+ 'help' => \$help
+);
+
+sub help {
+ print <<HELP;
+
+ TODO
+
+$0
+
+Options
+
+ --export-date
+ Bib records modified within EG since this time will be treated
+ specially when ingesting bib records produced by Backstage to
+ avoid losing change made by staff since the export.
+
+ --file
+ Full path to ZIP file to process.
+
+ --working-dir
+ Directory where constituent files are extracted.
+ Defaults to the CWD of this script.
+
+ --bib-collision-file
+ File created in the working directory containing MARC data of
+ bib records that were modified by staff after export and
+ modified by Backstage as part of the export. These are
+ re-imported via external Vandelay process.
+
+ --db-host
+ --db-name
+ --db-port
+ --db-pass
+ Database connections parameters.
+
+HELP
+ exit;
+}
+
+die "required: --export-date YYYY-MM-DD\n" unless
+ $export_date && $export_date =~ /^\d{4}-\d{2}-\d{2}$/;
+
+die "--file required\n" unless $file;
+
+# Log every occurrence of each event type.
+$log_mod = 1 if $verbose;
+
+sub announce {
+ my ($level, $msg, $die) = @_;
+ syslog("LOG_$level", $msg);
+
+ my $date_str = DateTime->now(time_zone => 'local')->strftime('%F %T');
+ my $msg_str = "$date_str [$$] $level $msg\n";
+
+ if ($die) {
+ die $msg_str;
+
+ } else {
+ if ($level eq 'ERR' or $level eq 'WARNING') {
+ # always copy problem messages to stdout
+ warn $msg_str; # avoid dupe messages
+ } else {
+ print $msg_str;
+ }
+ }
+}
+
+
+sub connect_db {
+ $db_handle = DBI->connect(
+ "dbi:Pg:db=$db_name;host=$db_host;port=$db_port;options='--statement-timeout=0'",
+ $db_user, $db_pass, {
+ RaiseError => 1,
+ PrintError => 0,
+ AutoCommit => 1,
+ pg_expand_array => 0,
+ pg_enable_utf8 => 1
+ }
+ ) or die "Connection to database failed: $DBI::err : $DBI::errstr";
+}
+
+sub process_zip_file {
+
+ my $zip = Archive::Zip->new();
+
+ announce('ERR', "Failed to read $file", 1)
+ unless $zip->read($file) == AZ_OK;
+
+ # Avoid processing XLS and HTM files.
+ # All of the MARC files end in .UTF8.
+ for my $member ($zip->membersMatching('.*\.UTF8')) {
+
+ my $basename = basename($member->fileName());
+
+ announce('INFO', "Extracting file $basename");
+
+ my $local_file = "$working_dir/$basename";
+
+ announce('ERR', "Unable to extract to file: $local_file", 1)
+ unless $member->extractToFileNamed($local_file) == AZ_OK;
+
+ my $marc_batch = MARC::File::USMARC->in($local_file, 'UTF8')
+ or announce('ERR', "Unable to read $local_file as MARC", 1);
+
+ if ($basename =~ /BIB/) {
+
+ handle_modified_bibs($marc_batch);
+
+ } elsif ($basename =~ /DEL/) {
+
+ handle_deleted_auths($marc_batch);
+
+ } elsif ($basename =~ /CHG|NEW/) {
+
+ handle_modified_auths($marc_batch);
+
+ } else {
+ announce('WARNING', "Un-handled file type: $basename");
+ }
+ }
+}
+
+# Returns ID's of bib records that have been modified since the export date.
+my @modified_bibs;
+sub find_modified_bibs {
+
+ my $id_arrays = $db_handle->selectall_arrayref(<<" SQL");
+ SELECT id
+ FROM biblio.record_entry
+ WHERE NOT deleted AND edit_date >= '$export_date'
+ SQL
+
+ @modified_bibs = map {$_->[0]} @$id_arrays;
+
+ announce('INFO', scalar(@modified_bibs)." bibs modified since export");
+}
+
+
+
+# 1. Bibs that have been modified by Backstage and locally are written
+# to the --bib-collision-file as MARC for later processing.
+# 2. Bibs that have only been modified by Backstage are updated
+# directly in the database.
+sub handle_modified_bibs {
+ my $marc_batch = shift;
+
+ find_modified_bibs() unless @modified_bibs;
+
+ while (my $record = $marc_batch->next()) {
+ my $bib_id = $record->subfield('901', 'c');
+
+ if (!$bib_id) {
+ announce('ERR', "Bib record has no 901c (ID) value. Skipping");
+ next;
+ }
+
+ if (grep {$bib_id eq $_} @modified_bibs) {
+ # Bib was edited by both parties. Save to external file
+ # for later processing.
+
+ write_bib_collision($record);
+
+ } else {
+ # Update our copy of the record.
+
+ my $marcxml = clean_marc($record->as_xml_record());
+ update_bib($marcxml, $bib_id);
+ }
+ }
+}
+
+sub update_bib {
+ my $marcxml = shift;
+ my $bib_id = shift;
+
+ eval { $mod_bibs_sth->execute($marcxml, $bib_id) };
+
+ if ($@) {
+ announce('ERR', "Error updating biblio record: $@ : $marcxml");
+ return;
+ }
+
+ $mod_bibs_ctr++;
+
+ announce('INFO', "Updated $mod_bibs_ctr bib records")
+ if $mod_bibs_ctr % $log_mod == 0;
+}
+
+sub write_bib_collision {
+ my $record = shift;
+
+ my $filename = "$working_dir/$bib_collision_file";
+
+ open(BIBS_FILE, ">>$filename") or
+ announce('ERR', "Cannot open bib collision file: $filename : $!", 1);
+
+ binmode(BIBS_FILE, ":utf8");
+
+ print BIBS_FILE $record->as_usmarc();
+
+ close BIBS_FILE or
+ announce('WARNING', "Error closing bib collision file: $filename : $!");
+
+ $col_bibs_ctr++;
+
+ announce('INFO', "Dumped $col_bibs_ctr bib collisions to file")
+ if $col_bibs_ctr % $log_mod == 0;
+}
+
+sub handle_deleted_auths {
+ my $marc_batch = shift;
+
+ while (my $record = $marc_batch->next()) {
+ my @matches = find_matching_auths($record);
+
+ for my $auth_id (@matches) {
+
+ eval {
+ # 2 mods.. wrap in transaction? (see autocommit)
+ $del_auth_sth->execute($auth_id);
+ $delmod_auth_sth->execute($auth_id);
+ };
+
+ if ($@) {
+ announce(
+ 'ERR', "Error deleting authority record: $@ : $auth_id");
+ next;
+ }
+
+ $del_auth_ctr++;
+
+ announce('INFO', "Deleted $del_auth_ctr authority records")
+ if $del_auth_ctr % $log_mod == 0;
+ }
+ }
+}
+
+sub handle_modified_auths {
+ my $marc_batch = shift;
+
+ while (my $record = $marc_batch->next()) {
+
+ my @matches = find_matching_auths($record);
+ my $marcxml = clean_marc($record->as_xml_record());
+
+ if (@matches) {
+ update_auth($marcxml, $_) for @matches;
+ } else {
+ insert_auth($marcxml);
+ }
+ }
+}
+
+
+sub update_auth {
+ my $marcxml = shift;
+ my $auth_id = shift;
+
+ eval { $mod_auth_sth->execute($marcxml, $auth_id) };
+
+ if ($@) {
+ announce('ERR', "Error updating authority record: $@ : $marcxml");
+ return;
+ }
+
+ $mod_auth_ctr++;
+
+ announce('INFO', "Updated $mod_auth_ctr authority records")
+ if $mod_auth_ctr % $log_mod == 0;
+}
+
+sub insert_auth {
+ my $marcxml = shift;
+
+ eval { $new_auth_sth->execute($marcxml, "IMPORT-" . time) };
+
+ if ($@) {
+ announce('ERR',
+ "Error creating new authority record: $@ : $marcxml");
+ return;
+ }
+
+ $new_auth_ctr++;
+
+ announce('INFO', "Created $new_auth_ctr authority records")
+ if $new_auth_ctr % $log_mod == 0;
+}
+
+# Return ID's of matching authority records. Matching tries:
+# 001 -> 010a -> 035a.
+sub find_matching_auths {
+ my $record = shift;
+
+ my $tag = '001';
+ my $subfield;
+
+ # 001 test requires its own SQL query
+ if (my $field = $record->field($tag)) {
+ if ($subfield = $field->data) {
+
+ $match_auth_001_sth->execute($subfield);
+ my $matches = $match_auth_001_sth->fetchall_arrayref;
+
+ if (@$matches) { # DEBUGGING...
+ my @ids = map {$_->[0]} @$matches;
+ announce('INFO', "Auth 001=$subfield matched records: @ids");
+ }
+ return map {$_->[0]} @$matches if @$matches;
+ }
+ }
+
+ $tag = '010';
+ $subfield = $record->subfield($tag, 'a');
+
+ if (!$subfield) {
+ $tag = '035';
+ $subfield = $record->subfield($tag, 'a');
+ }
+
+ return () unless $subfield;
+
+ $match_auth_sth->execute($tag, $subfield);
+ my $matches = $match_auth_sth->fetchall_arrayref;
+
+ return map {$_->[0]} @$matches;
+}
+
+sub prepare_statements {
+
+ $del_auth_sth = $db_handle->prepare(<<" SQL");
+ DELETE FROM authority.record_entry WHERE id = ?
+ SQL
+
+ $delmod_auth_sth = $db_handle->prepare(<<" SQL");
+ UPDATE authority.record_entry
+ SET edit_date = NOW() WHERE id = ?
+ SQL
+
+ $mod_bibs_sth = $db_handle->prepare(<<" SQL");
+ UPDATE biblio.record_entry
+ SET marc = ?, edit_date = NOW()
+ WHERE id = ?
+ SQL
+
+ $mod_auth_sth = $db_handle->prepare(<<" SQL");
+ UPDATE authority.record_entry
+ SET marc = ?, edit_date = NOW()
+ WHERE id = ?
+ SQL
+
+ $new_auth_sth = $db_handle->prepare(<<" SQL");
+ INSERT INTO authority.record_entry (marc, last_xact_id)
+ VALUES (?, ?)
+ SQL
+
+ $match_auth_sth = $db_handle->prepare(<<" SQL");
+ SELECT DISTINCT(record)
+ FROM authority.full_rec
+ WHERE
+ tag = ?
+ AND subfield = 'a'
+ AND value = NACO_NORMALIZE(?, 'a')
+ SQL
+
+ $match_auth_001_sth = $db_handle->prepare(<<" SQL");
+ SELECT DISTINCT(record)
+ FROM authority.full_rec
+ WHERE tag = '001' AND value = ?
+ SQL
+}
+
+openlog($syslog_ident, $syslog_ops, $syslog_facility);
+connect_db();
+prepare_statements();
+process_zip_file();
+
+$new_auth_sth->finish;
+$mod_auth_sth->finish;
+$del_auth_sth->finish;
+$delmod_auth_sth->finish;
+$match_auth_sth->finish;
+$match_auth_001_sth->finish;
+$mod_bibs_sth->finish;
+
+$db_handle->disconnect;
+