Updated authority files. Added files for import of bibs Getting ready for demo.
authorBill Erickson <berickxx@gmail.com>
Wed, 29 Oct 2014 21:06:28 +0000 (17:06 -0400)
committerBill Erickson <berickxx@gmail.com>
Thu, 21 Mar 2019 19:46:23 +0000 (15:46 -0400)
    Cross-port: 5984783

KCLS/bs_files/bib_inserter.pl [new file with mode: 0644]
KCLS/bs_files/bib_updater.pl [new file with mode: 0644]
KCLS/bs_files/marc2bre.pl [new file with mode: 0644]
KCLS/bs_files/sample.json
KCLS/bs_files/updated_bs_marc.zip [new file with mode: 0644]
KCLS/linking/authority_control_fields_batcher.pl

diff --git a/KCLS/bs_files/bib_inserter.pl b/KCLS/bs_files/bib_inserter.pl
new file mode 100644 (file)
index 0000000..5ed9417
--- /dev/null
@@ -0,0 +1,143 @@
+#!/usr/bin/perl
+use strict;
+
+use OpenSRF::System;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::Utils::SettingsClient;
+use OpenILS::Utils::Fieldmapper;
+use OpenSRF::Utils::JSON;
+use FileHandle;
+use Data::Dumper;
+
+use Time::HiRes qw/time/;
+use Getopt::Long;
+
+my @files;
+my ($config, $output, @auto, @order, @wipe, $quiet) =
+       ('/openils/conf/opensrf_core.xml');
+my $nocommit = 0;
+
+GetOptions( 'config=s'     => \$config,
+            'output=s'     => \$output,
+            'wipe=s'       => \@wipe,
+            'autoprimary=s' => \@auto,
+            'order=s'      => \@order,
+            'nocommit|n'    => \$nocommit,
+            'quiet'         => \$quiet,
+);
+
+my %lineset;
+my %fieldcache;
+
+OpenSRF::System->bootstrap_client( config_file => $config );
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+my $count = 0;
+my $starttime = time;
+
+print STDERR "\nWriting file ...\n" if (!$quiet);
+
+$output = '&STDOUT' unless ($output);
+$output = FileHandle->new(">$output") if ($output);
+
+binmode($output,'utf8');
+
+$output->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
+$output->print("BEGIN;\n\n");
+
+while ( my $rec = <> ) {
+       next unless ($rec);
+
+       
+
+       my $row;
+       try {
+               $row = OpenSRF::Utils::JSON->JSON2perl($rec);
+       } catch Error with {
+               my $e = shift;
+               warn "\n\n !!! Error : $e \n\n at or around line $count\n";
+               print STDERR "\nSomething went wrong...\n";
+       };
+       next unless ($row);
+
+       my $class = $row->class_name;
+       my $hint = $row->json_hint;
+       
+       # active,create_date,creator,deleted,edit_date,editor,fingerprint,id,
+       #last_xact_id,marc,quality,source,tcn_source,tcn_value,owner,share_depth';
+       
+       my $marc = $row->marc;
+       $marc =~ s/\f/\\f/gos;
+       $marc =~ s/\n/\\n/gos;
+       $marc =~ s/\r/\\r/gos;
+       $marc =~ s/\t/\\t/gos;
+       $marc =~ s/\\/\\\\/gos;
+
+       print STDERR "\n" . $row->tcn_value;
+       $output->print("INSERT INTO biblio.record_entry (marc, last_xact_id) VALUES (\$BOOGADYBOOOGADYBOOOO\$"
+       .$marc
+       ."\$BOOGADYBOOOGADYBOOOO\$, 'kyle');\n");
+
+       if (!$quiet && !($count % 500)) {
+               
+               print STDERR "\r$count\t". $count / (time - $starttime);
+       }
+
+       $count++;
+}
+
+
+
+
+my $after_commit = '';
+for my $h (@order) {
+       
+       
+       
+       # continue if there was no data for this table
+       next unless ($fieldcache{$h});
+       
+       
+
+       my $fields = join(',', @{ $fieldcache{$h}{fields} });
+       $output->print( "DELETE FROM $fieldcache{$h}{table};\n" ) if (grep {$_ eq $h } @wipe);
+       # Speed up loading of bib records
+       $output->print( "COPY $fieldcache{$h}{table} ($fields) FROM STDIN;\n" );
+
+       for my $line (@{ $lineset{$h} }) {
+               my @data;
+               my $x = 0;
+               
+               for my $d (@$line) {
+                       if (!defined($d)) {
+                               $d = '\N';
+                       } else {
+                               $d =~ s/\f/\\f/gos;
+                               $d =~ s/\n/\\n/gos;
+                               $d =~ s/\r/\\r/gos;
+                               $d =~ s/\t/\\t/gos;
+                               $d =~ s/\\/\\\\/gos;
+                       }
+                       if ($h eq 'bre' and $fieldcache{$h}{fields}[$x] eq 'quality') {
+                               $d = int($d) if ($d ne '\N');
+                       }
+                       push @data, $d;
+                       $x++;
+               }
+               $output->print( join("\t", @data)."\n" );
+       }
+
+       $output->print('\.'."\n\n");
+       
+       if ($h eq 'mfr') {
+               $output->print("SELECT reporter.enable_materialized_simple_record_trigger();\n");
+               $output->print("SELECT reporter.disable_materialized_simple_record_trigger();\n");
+       }
+
+       $after_commit .= "SELECT setval('$fieldcache{$h}{sequence}'::TEXT, (SELECT MAX($fieldcache{$h}{pkey}) FROM $fieldcache{$h}{table}), TRUE);\n"
+               if (!grep { $_ eq $h} @auto);
+}
+
+$output->print("COMMIT;\n\n") unless $nocommit;
+$output->print($after_commit);
+$output->close; 
diff --git a/KCLS/bs_files/bib_updater.pl b/KCLS/bs_files/bib_updater.pl
new file mode 100644 (file)
index 0000000..5ed9417
--- /dev/null
@@ -0,0 +1,143 @@
+#!/usr/bin/perl
+use strict;
+
+use OpenSRF::System;
+use OpenSRF::EX qw/:try/;
+use OpenSRF::Utils::SettingsClient;
+use OpenILS::Utils::Fieldmapper;
+use OpenSRF::Utils::JSON;
+use FileHandle;
+use Data::Dumper;
+
+use Time::HiRes qw/time/;
+use Getopt::Long;
+
+my @files;
+my ($config, $output, @auto, @order, @wipe, $quiet) =
+       ('/openils/conf/opensrf_core.xml');
+my $nocommit = 0;
+
+GetOptions( 'config=s'     => \$config,
+            'output=s'     => \$output,
+            'wipe=s'       => \@wipe,
+            'autoprimary=s' => \@auto,
+            'order=s'      => \@order,
+            'nocommit|n'    => \$nocommit,
+            'quiet'         => \$quiet,
+);
+
+my %lineset;
+my %fieldcache;
+
+OpenSRF::System->bootstrap_client( config_file => $config );
+Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
+
+my $count = 0;
+my $starttime = time;
+
+print STDERR "\nWriting file ...\n" if (!$quiet);
+
+$output = '&STDOUT' unless ($output);
+$output = FileHandle->new(">$output") if ($output);
+
+binmode($output,'utf8');
+
+$output->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
+$output->print("BEGIN;\n\n");
+
+while ( my $rec = <> ) {
+       next unless ($rec);
+
+       
+
+       my $row;
+       try {
+               $row = OpenSRF::Utils::JSON->JSON2perl($rec);
+       } catch Error with {
+               my $e = shift;
+               warn "\n\n !!! Error : $e \n\n at or around line $count\n";
+               print STDERR "\nSomething went wrong...\n";
+       };
+       next unless ($row);
+
+       my $class = $row->class_name;
+       my $hint = $row->json_hint;
+       
+       # active,create_date,creator,deleted,edit_date,editor,fingerprint,id,
+       #last_xact_id,marc,quality,source,tcn_source,tcn_value,owner,share_depth';
+       
+       my $marc = $row->marc;
+       $marc =~ s/\f/\\f/gos;
+       $marc =~ s/\n/\\n/gos;
+       $marc =~ s/\r/\\r/gos;
+       $marc =~ s/\t/\\t/gos;
+       $marc =~ s/\\/\\\\/gos;
+
+       print STDERR "\n" . $row->tcn_value;
+       $output->print("INSERT INTO biblio.record_entry (marc, last_xact_id) VALUES (\$BOOGADYBOOOGADYBOOOO\$"
+       .$marc
+       ."\$BOOGADYBOOOGADYBOOOO\$, 'kyle');\n");
+
+       if (!$quiet && !($count % 500)) {
+               
+               print STDERR "\r$count\t". $count / (time - $starttime);
+       }
+
+       $count++;
+}
+
+
+
+
+my $after_commit = '';
+for my $h (@order) {
+       
+       
+       
+       # continue if there was no data for this table
+       next unless ($fieldcache{$h});
+       
+       
+
+       my $fields = join(',', @{ $fieldcache{$h}{fields} });
+       $output->print( "DELETE FROM $fieldcache{$h}{table};\n" ) if (grep {$_ eq $h } @wipe);
+       # Speed up loading of bib records
+       $output->print( "COPY $fieldcache{$h}{table} ($fields) FROM STDIN;\n" );
+
+       for my $line (@{ $lineset{$h} }) {
+               my @data;
+               my $x = 0;
+               
+               for my $d (@$line) {
+                       if (!defined($d)) {
+                               $d = '\N';
+                       } else {
+                               $d =~ s/\f/\\f/gos;
+                               $d =~ s/\n/\\n/gos;
+                               $d =~ s/\r/\\r/gos;
+                               $d =~ s/\t/\\t/gos;
+                               $d =~ s/\\/\\\\/gos;
+                       }
+                       if ($h eq 'bre' and $fieldcache{$h}{fields}[$x] eq 'quality') {
+                               $d = int($d) if ($d ne '\N');
+                       }
+                       push @data, $d;
+                       $x++;
+               }
+               $output->print( join("\t", @data)."\n" );
+       }
+
+       $output->print('\.'."\n\n");
+       
+       if ($h eq 'mfr') {
+               $output->print("SELECT reporter.enable_materialized_simple_record_trigger();\n");
+               $output->print("SELECT reporter.disable_materialized_simple_record_trigger();\n");
+       }
+
+       $after_commit .= "SELECT setval('$fieldcache{$h}{sequence}'::TEXT, (SELECT MAX($fieldcache{$h}{pkey}) FROM $fieldcache{$h}{table}), TRUE);\n"
+               if (!grep { $_ eq $h} @auto);
+}
+
+$output->print("COMMIT;\n\n") unless $nocommit;
+$output->print($after_commit);
+$output->close; 
diff --git a/KCLS/bs_files/marc2bre.pl b/KCLS/bs_files/marc2bre.pl
new file mode 100644 (file)
index 0000000..bddde4f
--- /dev/null
@@ -0,0 +1,396 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+use Error qw/:try/;
+use OpenILS::Utils::Fieldmapper;
+use Digest::MD5 qw/md5_hex/;
+use OpenSRF::Utils::JSON;
+use OpenILS::Application::AppUtils;
+use Data::Dumper;
+use Unicode::Normalize;
+use Encode;
+
+use FileHandle;
+use Time::HiRes qw/time/;
+use Getopt::Long;
+use MARC::Batch;
+use MARC::File::XML ( BinaryEncoding => 'utf-8' );
+use MARC::Charset;
+use DBI;
+
+#MARC::Charset->ignore_errors(1);
+
+my ($id_field, $id_subfield, $recid, $user, $config, $idlfile, $marctype, $tcn_offset, $tcn_mapfile, $tcn_dumpfile, $used_id_file, $used_tcn_file, $enc, @files, @trash_fields, @req_fields, $use901, $quiet, $tcn_field, $tcn_subfield) =
+       ('', 'a', 0, 1, '/openils/conf/opensrf_core.xml', '/openils/conf/fm_IDL.xml', 'USMARC', 0);
+
+my ($db_driver, $db_host, $db_port, $db_name, $db_user, $db_pw) =
+       ('Pg', 'localhost', 5432, 'evergreen', 'postgres', 'postgres');
+
+GetOptions(
+       'marctype=s'    => \$marctype, # format of MARC files being processed defaults to USMARC, often set to XML
+       'startid=i'     => \$recid, # id number to start with when auto-assigning id numbers, defaults to highest id in database + 1
+       'idfield=s'     => \$id_field, # field containing the record's desired internal id, NOT tcn
+       'idsubfield=s'  => \$id_subfield, # subfield of above record id field
+       'tcnfield=s'    => \$tcn_field, # field containing the record's desired tcn, NOT the internal id
+       'tcnsubfield=s' => \$tcn_subfield, # subfield of above record tcn field
+       'tcnoffset=i'   => \$tcn_offset, # optionally skip characters at beginning of supplied tcn (e.g. to remove '(Sirsi)')
+       'user=s'        => \$user, # set creator/editor values for records in database
+       'encoding=s'    => \$enc, # set assumed MARC encoding for MARC::Charset
+       'keyfile=s'     => \$tcn_mapfile, # DEPRECATED, use tcn_mapfile instead
+       'tcn_mapfile=s' => \$tcn_mapfile, # external file which allows for matching specific record tcns to specific record ids, format = one id_number|tcn_number combo per line
+       'tcnfile=s'     => \$tcn_dumpfile, # DEPRECATED, use tcn_dumpfile instead
+       'tcn_dumpfile=s'        => \$tcn_dumpfile, # allows specification of a dumpfile for all used tcn values
+       'config=s'      => \$config, # location of OpenSRF core config file, defaults to /openils/conf/opensrf_core.xml
+       'file=s'        => \@files, # files to process (or you can simple list the files as unnamed arguments, i.e. @ARGV)
+       'required_fields=s'     => \@req_fields, # skip any records missing these fields
+       'trash=s'       => \@trash_fields, # fields to remove from all processed records
+       'xml_idl=s'     => \$idlfile, # location of XML IDL file, defaults to /openils/conf/fm_IDL.xml
+       'dontuse=s'     => \$used_id_file, # DEPRECATED, use used_id_file instead
+       'used_id_file=s'        => \$used_id_file, # external file which prevents id collisions by specifying ids already in use in the database, format = one id number per line
+       'used_tcn_file=s'       => \$used_tcn_file, # external file which prevents tcn collisions by specifying tcns already in use in the database, format = one tcn number per line
+       "db_driver=s"   => \$db_driver, # database driver type, usually 'Pg'
+       "db_host=s"     => \$db_host, # database hostname
+       "db_port=i"     => \$db_port, # database port
+       "db_name=s"     => \$db_name, # database name
+       "db_user=s"     => \$db_user, # database username
+       "db_pw=s"       => \$db_pw, # database password
+       'use901'        => \$use901, # use values from previously created 901 fields and skip all other processing
+       'quiet'         => \$quiet # do not output progress count
+);
+
+@trash_fields = split(/,/,join(',',@trash_fields));
+@req_fields = split(/,/,join(',',@req_fields));
+
+if ($enc) {
+       MARC::Charset->ignore_errors(1);
+       MARC::Charset->assume_encoding($enc);
+}
+
+if (uc($marctype) eq 'XML') {
+       'open'->use(':utf8');
+} else {
+       bytes->use();
+}
+
+@files = @ARGV if (!@files);
+
+my @ses;
+my @req;
+my %processing_cache;
+
+my $dsn = "dbi:$db_driver:host=$db_host;port=$db_port;dbname=$db_name";
+
+if (!$recid) {
+    my $table = 'biblio_record_entry';
+    $table = 'biblio.record_entry' if ($db_driver eq 'Pg');
+
+       my $dbh = DBI->connect($dsn,$db_user,$db_pw);
+       my $sth = $dbh->prepare("SELECT MAX(id) + 1 FROM $table");
+
+       $sth->execute;
+       $sth->bind_col(1, \$recid);
+       $sth->fetch;
+       $sth->finish;
+       $dbh->disconnect;
+
+       # In a clean Evergreen schema, the maximum ID will be -1; but sequences
+       # have to start at 1, so handle the clean Evergreen schema situation
+       if ($recid == 0) {
+               $recid = 1;
+       }
+}
+
+my %tcn_source_map = (
+       a  => 'Sirsi_Auto',
+       o  => 'OCLC',
+       i  => 'ISxN',
+       l  => 'LCCN',
+       s  => 'System',
+       g  => 'Gutenberg',
+       z  => 'Unknown',
+);
+
+Fieldmapper->import(IDL => $idlfile);
+
+my %tcn_map;
+if ($tcn_mapfile) {
+       open F, $tcn_mapfile or die "Couldn't open key file $tcn_mapfile";
+       while (<F>) {
+               if ( /^(\d+)\|(\S+)/o ) {
+                       $tcn_map{$1} = $2;
+               }
+       }
+       close(F);
+}
+
+my %used_recids;
+if ($used_id_file) {
+       open F, $used_id_file or die "Couldn't open used-id file $used_id_file";
+       while (<F>) {
+               chomp;
+               s/^\s*//;
+               s/\s*$//;
+               $used_recids{$_} = 1;
+       }
+       close(F);
+}
+
+my %used_tcns;
+if ($used_tcn_file) {
+       open F, $used_tcn_file or die "Couldn't open used-tcn file $used_tcn_file";
+       while (<F>) {
+               chomp;
+               s/^\s*//;
+               s/\s*$//;
+               $used_tcns{$_} = 1;
+       }
+       close(F);
+}
+
+select STDERR; $| = 1;
+select STDOUT; $| = 1;
+
+my $batch = new MARC::Batch ( $marctype, @files );
+$batch->strict_off();
+$batch->warnings_off();
+
+my $starttime = time;
+my $rec;
+my $count = 0;
+PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
+       next if ($rec == -1);
+
+       $count++;
+
+       # Skip records that don't contain a required field (like '245', for example)
+       foreach my $req_field (@req_fields) {
+               if (!$rec->field("$req_field")) {
+                       warn "\n!!! Record $count missing required field $req_field, skipping record.\n";
+                       next PROCESS;
+               }
+       }
+
+       my $id;
+       my $tcn_value = '';
+       my $tcn_source = '';
+       # If $use901 is set, use it for the id, the tcn, and the tcn source without ANY further processing (i.e. no error checking)
+       if ($use901) {
+               $rec->delete_field($_) for ($rec->field(@trash_fields));
+               $tcn_value = $rec->subfield('901' => 'a');
+               $tcn_source = $rec->subfield('901' => 'b');
+               $id = $rec->subfield('901' => 'c');
+       } else {
+               # This section of code deals with the record's 'id', which is a system-level, numeric, internal identifier
+               # It is often convenient but not necessary to carry over the internal ids from your previous ILS, so here is where that happens
+               if ($id_field) {
+                       my $field = $rec->field($id_field);
+                       if ($field) {
+                               if ($field->is_control_field) {
+                                       $id = $field->data;
+                               } else {
+                                       $id = $field->subfield($id_subfield);
+                               }
+                               # ensure internal record ids are numeric only
+                               $id =~ s/\D+//gso if $id;
+                       }
+
+                       # catch problem ids
+                       if (!$id) {
+                               warn "\n!!! Record $count has missing or invalid id field $id_field, assigning new id.\n";
+                               $id = '';
+                       } elsif (exists $used_recids{$id}) {
+                               warn "\n!!! Record $count has a duplicate id in field $id_field, assigning new id.\n";
+                               $id = '';
+                       } else {
+                               $used_recids{$id} = 1;
+                       }
+               }
+
+               # id field not specified or found to be invalid, assign auto id
+               if (!$id) {
+                       while (exists $used_recids{$recid}) {
+                               $recid++;
+                       }
+                       $used_recids{$recid} = 1;
+                       $id = $recid;
+                       $recid++;
+               }
+
+               # This section of code deals with the record's 'tcn', or title control number, which is a record-level, possibly alpha-numeric, sometimes user-supplied value
+               if ($tcn_field) {
+                       if ($tcn_mapfile) {
+                               if (my $tcn = $tcn_map{$id}) {
+                                       $rec->delete_field( $_ ) for ($rec->field($tcn_field));
+                                       $rec->append_fields( MARC::Field->new( $tcn_field, '', '', $tcn_subfield, $tcn ) );
+                               } else {
+                                       warn "\n!!! ID $id not found in tcn_mapfile, skipping record.\n";
+                                       $count++;
+                                       next;
+                               }
+                       }
+
+                       my $field = $rec->field($tcn_field);
+                       if ($field) {
+                               if ($field->is_control_field) {
+                                       $tcn_value = $field->data;
+                               } else {
+                                       $tcn_value = $field->subfield($tcn_subfield);
+                               }
+                               # $tcn_offset is another Sirsi influence, as it will allow you to remove '(Sirsi)'
+                               # from exported tcns, but was added more generically to perhaps support other use cases
+                               if ($tcn_value) { 
+                                       $tcn_value = substr($tcn_value, $tcn_offset);
+                               } else {
+                                       $tcn_value = '';
+                               }
+                       }
+               }
+
+               # turn our id and tcn into a 901 field, and also create a tcn and/or figure out the tcn source
+               ($tcn_value, $tcn_source) = preprocess($rec, $tcn_value, $id);
+               # delete the old identifier and trash fields
+               $rec->delete_field($_) for ($rec->field('901', $tcn_field, $id_field, @trash_fields));
+       }
+
+       (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
+       $xml =~ s/^<\?xml.+\?\s*>//go;
+       $xml =~ s/>\s+</></go;
+       $xml =~ s/\p{Cc}//go;
+       $xml = OpenILS::Application::AppUtils->entityize($xml);
+       $xml =~ s/[\x00-\x1f]//go;
+
+       my $bib = new Fieldmapper::biblio::record_entry;
+       $bib->id($id);
+       $bib->active('t');
+       $bib->deleted('f');
+       $bib->marc($xml);
+       $bib->creator($user);
+       $bib->create_date('now');
+       $bib->editor($user);
+       $bib->edit_date('now');
+       $bib->tcn_source($tcn_source);
+       $bib->tcn_value($tcn_value);
+       $bib->last_xact_id('IMPORT-'.$starttime);
+
+       print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
+       $used_tcns{$tcn_value} = 1;
+
+       if (!$quiet && !($count % 50)) {
+               print STDERR "\r$count\t". $count / (time - $starttime);
+       }
+}
+
+if ($tcn_dumpfile) {
+    open TCN_DUMPFILE, '>', $tcn_dumpfile;
+    print TCN_DUMPFILE "$_\n" for (keys %used_tcns);
+}
+
+
+sub preprocess {
+       my $rec = shift;
+       my $tcn_value = shift;
+       my $id = shift;
+
+       my $tcn_source = '';
+       # in the following code, $tcn_number represents the portion of the tcn following the source code-letter
+       my $tcn_number = '';
+       my $warn = 0;
+       my $passed_tcn = '';
+
+       # this preprocess subroutine is optimized for Sirsi-created tcns, that is, those with a single letter
+       # followed by some digits (and maybe 'x' in older systems).  If using user supplied tcns, try to identify
+       # the source here, otherwise set to 'z' ('Unknown')
+       if ($tcn_value =~ /([a-z])([0-9xX]+)/) {
+               $tcn_source = $1;
+               $tcn_number = $2;
+       } else {
+               $tcn_source = 'z';
+       }
+       
+       # save and warn if a passed in TCN is replaced  
+       if ($tcn_value && exists $used_tcns{$tcn_value}) {
+               $passed_tcn = $tcn_value;
+               $tcn_value = '';
+               $tcn_number = '';
+               $tcn_source = '';
+               $warn = 1;
+       } 
+
+       # we didn't have a user supplied tcn, or it was a duplicate, so let's derive one from commonly unique record fields
+       if (!$tcn_value) {
+               my $f = $rec->field('001');
+               $tcn_value = despace($f->data) if ($f);
+       }
+
+       if (!$tcn_value || exists $used_tcns{$tcn_value}) {
+               my $f = $rec->field('000');
+               if ($f) {
+                       $tcn_number = despace($f->data);
+                       $tcn_source = 'g'; # only Project Gutenberg seems to use this
+                       $tcn_value = $tcn_source.$tcn_number;
+               }
+       }
+
+    if (!$tcn_value || exists $used_tcns{$tcn_value}) {
+        my $f = $rec->field('020');
+               if ($f) {       
+                       $tcn_number = despace($f->subfield('a'));
+                       $tcn_source = 'i';
+                       $tcn_value = $tcn_source.$tcn_number;
+               }
+    }
+
+    if (!$tcn_value || exists $used_tcns{$tcn_value}) {
+        my $f = $rec->field('022');
+               if ($f) {       
+                       $tcn_number = despace($f->subfield('a'));
+                       $tcn_source = 'i';
+                       $tcn_value = $tcn_source.$tcn_number;
+               }
+    }
+
+    if (!$tcn_value || exists $used_tcns{$tcn_value}) {
+        my $f = $rec->field('010');
+               if ($f) {       
+                       $tcn_number = despace($f->subfield('a'));
+                       $tcn_source = 'l';
+                       $tcn_value = $tcn_source.$tcn_number;
+               }
+    }
+
+       # special case to catch possibly passed in full OCLC numbers and those derived from the 001 field
+       if ($tcn_value =~ /^oc(m|n)(\d+)$/o) {
+               $tcn_source = 'o';
+               $tcn_number = $2;
+               $tcn_value = $tcn_source.$tcn_number;
+       }
+
+    if (!$tcn_value || exists $used_tcns{$tcn_value}) {
+               $tcn_source = 's';
+               $tcn_number = $id;
+               $tcn_value = $tcn_source.$tcn_number;
+               $warn = 1
+    }
+
+
+       # expand $tcn_source from code letter to full name
+       $tcn_source = do { $tcn_source_map{$tcn_source} || 'Unknown' };
+
+       if ($warn) {
+               warn "\n!!! TCN $passed_tcn is already in use, using TCN ($tcn_value) derived from $tcn_source ID.\n";
+       }
+
+       return ($tcn_value, $tcn_source);
+}
+
+sub despace {
+       my $value = shift;
+
+       # remove all leading/trailing spaces and trucate at first internal space if present
+       $value =~ s/\s*$//o;
+       $value =~ s/^\s*//o;
+       $value =~ s/^(\S+).*$/$1/o;
+
+       return $value;
+}
index 5bcbeba..8851796 100644 (file)
@@ -1,7 +1,7 @@
 {
 "export":
        {
-       "last_run_date":"2013-08-17 00:00:00-00",
+       "last_run_date":"2013-08-22 15:21:00-00",
        "output":"/path/to/file.mrc",
        "sources": [ 1, 2 ],
        "recipients": ["user@domain.tld"]
@@ -49,7 +49,7 @@
        },
 "import":
        {
-       "working_dir":"/home/kclsdev/Evergreen",
+       "working_dir":"/home/kclsdev/24kcls_evergreen",
        "print_import":true,
        "print_keep":true,
        "print_delete":true,
diff --git a/KCLS/bs_files/updated_bs_marc.zip b/KCLS/bs_files/updated_bs_marc.zip
new file mode 100644 (file)
index 0000000..8bfabb9
Binary files /dev/null and b/KCLS/bs_files/updated_bs_marc.zip differ
index 634e76f..690ec6c 100755 (executable)
@@ -61,7 +61,7 @@ my $result = GetOptions("lower-bound=i" => \$lower_bound,
 #my $dsn = "dbi:Pg:database=" . $egdbi->database;
 
 #my $dsn = "dbi:Pg:database=rel_2_4_1_20130821_auth_v3";
-my $dsn = "dbi:Pg:database=rel_2_4_1_20130816_200bib_kcls_v4";
+my $dsn = "dbi:Pg:database=rel_2_4_1_20130816_200bib_kcls_v6";
 
 #if ($egdbi->host) {
 #    $dsn .= ";host=" . $egdbi->host;