From: dbs Date: Tue, 4 Jan 2011 19:23:20 +0000 (+0000) Subject: Clean up marc_export and teach it how to export (all) authority records X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=c42668a59690699af0a72a4dc3c5e15cb12b220c;p=contrib%2FConifer.git Clean up marc_export and teach it how to export (all) authority records The new "--type" parameter enables users to specify "authority" and have the expected result. Add an "--all" option to marc_export and silence some warnings Rather than forcing users to generate a set of record IDs using a separate process, the --all option enables to to export all records of the specified type. Note that the approach is basic: grab the record with the highest ID, then start at ID 0 and count up by 1 while trying to retrieve each record along the way. While this will undoubtedly generate plenty of warnings for deleted records or gaps in the ID sequences, the alternative of retrieving an ID list for potentially millions of records in large Evergreen instances wasn't appealing. Some other refactoring and fixes: Three large chunks of code were factored out into subroutines to make it a little easier to read the main flow of the code. As we explicitly call uc() on the $format input parameter to begin with, we don't have to call it subsequently. If a requested ID did not exist in the database, then the script would die; now we trap and flag that error but continue to fulfill subsequent requests. The --money parameter is now documented in the help, and the help has a brief overview that describes (poorly) the expected format for the list of IDs. An uninitialized variable error when exporting BRE format records was squashed. Also, $count{did} and $count{bib} resulted in uninitialized variable warnings if no records were actually exported, so initialize them to 0. git-svn-id: svn://svn.open-ils.org/ILS/branches/rel_2_0@19114 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- diff --git a/Open-ILS/src/support-scripts/marc_export b/Open-ILS/src/support-scripts/marc_export index f5164bab7b..6360b1cda8 100755 --- a/Open-ILS/src/support-scripts/marc_export +++ b/Open-ILS/src/support-scripts/marc_export @@ -1,4 +1,5 @@ #!/usr/bin/perl +# vim:et:sw=4:ts=4: use strict; use warnings; use bytes; @@ -20,18 +21,20 @@ use Time::HiRes qw/time/; use Getopt::Long; -my @formats = qw/USMARC UNIMARC XML BRE/; +my @formats = qw/USMARC UNIMARC XML BRE ARE/; -my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef); +my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef); GetOptions( 'help' => \$help, 'items' => \$holdings, 'mfhd' => \$export_mfhd, + 'all' => \$all_records, 'location=s' => \$location, 'money=s' => \$dollarsign, 'config=s' => \$config, 'format=s' => \$format, + 'type=s' => \$type, 'xml-idl=s' => \$idl, 'encoding=s' => \$encoding, 'timeout=i' => \$timeout, @@ -39,36 +42,61 @@ GetOptions( if ($help) { print <<"HELP"; +This script exports MARC authority, bibliographic, and serial holdings +records from an Evergreen database. + +Input to this script can consist of a list of record IDs, with one record ID +per line, corresponding to the record ID in the Evergreen database table of +your requested record type. + +Alternately, passing the --all option will attempt to export all records of +the specified type from the Evergreen database. The --all option starts at +record ID 1 and increments the ID by 1 until the largest ID in the database +is retrieved. This may not be very efficient for databases with large gaps +in their ID sequences. + Usage: $0 [options] --help or -h This screen. --config or -c Configuration file [/openils/conf/opensrf_core.xml] - --format or -f Output format (USMARC, UNIMARC, XML, BRE) [USMARC] - --encoding or -e Output Encoding (UTF-8, ISO-8859-?, MARC8) [MARC8] + --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC] + --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8] + --xml-idl or -x Location of the IDL XML + --timeout Timeout for exporting a single record; increase if you + are using --holdings and are exporting records that + have a lot of items attached to them. + --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO] + --all or -a Export all records; ignores input list + + Additional options for type = 'BIBLIO': --items or -i Include items (holdings) in the output + --money Currency symbol to use in item price field [\$] --mfhd Export serial MFHD records for associated bib records Not compatible with --format=BRE - --xml-idl or -x Location of the IDL XML --location or -l MARC Location Code for holdings from http://www.loc.gov/marc/organizations/orgshome.html - --timeout Timeout for exporting a single record; increase if you - are using --holdings and are exporting bibs that - have a lot of items attached to them. -Example: +Examples: +To export a set of USMARC records in a file named "output_file" based on the +IDs contained in a file named "list_of_ids": cat list_of_ids | $0 > output_file +To export a set of MARC21XML authority records in a file named "output.xml" +for all authority records in the database: + $0 --format XML --type AUTHORITY --all > output.xml + HELP exit; } +$type = lc($type); $format = uc($format); $encoding = uc($encoding); binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8'); binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8'); -if (!grep { uc($format) eq $_ } @formats) { +if (!grep { $format eq $_ } @formats) { die "Please select a supported format. ". "Right now that means one of [". join('|',@formats). "]\n"; @@ -107,116 +135,97 @@ my %orgs; my %shelves; my $flesh = {}; + if ($holdings) { + get_bib_locations(); +} - print STDERR "Retrieving Org Units ... "; - my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } ); +my $start = time; +my $last_time = time; +my %count = ('bib' => 0, 'did' => 0); +my $speed = 0; - while (my $o = $r->recv) { - die $r->failed->stringify if ($r->failed); - $o = $o->content; - last unless ($o); - $orgs{$o->id} = $o; +if ($all_records) { + my $top_record = 0; + if ($type eq 'biblio') { + $top_record = $editor->search_biblio_record_entry([ + {deleted => 'f'}, + {order_by => { 'bre' => 'id DESC' }, limit => 1} + ])->[0]->id; + } elsif ($type eq 'authority') { + $top_record = $editor->search_authority_record_entry([ + {deleted => 'f'}, + {order_by => { 'are' => 'id DESC' }, limit => 1} + ])->[0]->id; } - $r->finish; - print STDERR "OK\n"; + for (my $i = 0; $i++ < $top_record;) { + export_record($i); + } +} else { + while ( my $i = <> ) { + export_record($i); + } +} - print STDERR "Retrieving Shelving locations ... "; - $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } ); +print "\n" if ($format eq 'XML'); - while (my $s = $r->recv) { - die $r->failed->stringify if ($r->failed); - $s = $s->content; - last unless ($s); - $shelves{$s->id} = $s; - } - $r->finish; - print STDERR "OK\n"; +$speed = $count{did} / (time - $start); +my $time = time - $start; +print STDERR < 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } }; -} +Exports Attempted : $count{bib} +Exports Completed : $count{did} +Overall Speed : $speed +Total Time Elapsed: $time seconds + +DONE + +sub export_record { + my $id = shift; -my $start = time; -my $last_time = time; -my %count = (); -my $speed = 0; -while ( my $i = <> ) { my $bib; - my $r = $ses->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve', $i, $flesh ); + my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh ); my $s = $r->recv(timeout => $timeout); + if (!$s) { + warn "\n!!!!! Failed trying to read record $id\n"; + return; + } if ($r->failed) { - warn "\n!!!!!! Failed trying to read record $i: " . $r->failed->stringify . "\n"; - next; + warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n"; + return; } if ($r->timed_out) { - warn "\n!!!!!! Timed out trying to read record $i\n"; - next; + warn "\n!!!!!! Timed out trying to read record $id\n"; + return; } $bib = $s->content; $r->finish; $count{bib}++; - next unless $bib; + return unless $bib; - if (uc($format) eq 'BRE') { + if ($format eq 'ARE' or $format eq 'BRE') { print OpenSRF::Utils::JSON->perl2JSON($bib); stats(); - next; + $count{did}++; + return; } try { my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format ); - my $cn_list = $bib->call_numbers; - if ($cn_list && @$cn_list) { - - $count{cn} += @$cn_list; - - my $cp_list = [ map { @{ $_->copies } } @$cn_list ]; - if ($cp_list && @$cp_list) { - - my %cn_map; - push @{$cn_map{$_->call_number}}, $_ for (@$cp_list); - - for my $cn ( @$cn_list ) { - my $cn_map_list = $cn_map{$cn->id}; - - for my $cp ( @$cn_map_list ) { - $count{cp}++; - - $r->append_fields( - MARC::Field->new( - 852, '4', '', - a => $location, - b => $orgs{$cn->owning_lib}->shortname, - b => $orgs{$cp->circ_lib}->shortname, - c => $shelves{$cp->location}->name, - j => $cn->label, - ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()), - p => $cp->barcode, - ($cp->price ? ( y => $dollarsign.$cp->price ) : ()), - ($cp->copy_number ? ( t => $cp->copy_number ) : ()), - ($cp->ref eq 't' ? ( x => 'reference' ) : ()), - ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()), - ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()), - ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()), - ) - ); - - stats() if (! ($count{cp} % 100 )); - } - } - } + if ($type eq 'biblio') { + add_bib_holdings($bib, $r); } - if (uc($format) eq 'XML') { + if ($format eq 'XML') { my $xml = $r->as_xml_record; $xml =~ s/^<\?.+?\?>$//mo; print $xml; - } elsif (uc($format) eq 'UNIMARC') { + } elsif ($format eq 'UNIMARC') { print $r->as_usmarc; - } elsif (uc($format) eq 'USMARC') { + } elsif ($format eq 'USMARC') { print $r->as_usmarc; } @@ -228,19 +237,19 @@ while ( my $i = <> ) { import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export }; - if ($export_mfhd) { - my $mfhds = $editor->search_serial_record_entry({record => $i, deleted => 'f'}); + if ($export_mfhd and $type eq 'biblio') { + my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'}); foreach my $mfhd (@$mfhds) { try { my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format ); - if (uc($format) eq 'XML') { + if ($format eq 'XML') { my $xml = $r->as_xml_record; $xml =~ s/^<\?.+?\?>$//mo; print $xml; - } elsif (uc($format) eq 'UNIMARC') { + } elsif ($format eq 'UNIMARC') { print $r->as_usmarc; - } elsif (uc($format) eq 'USMARC') { + } elsif ($format eq 'USMARC') { print $r->as_usmarc; } } otherwise { @@ -254,20 +263,6 @@ while ( my $i = <> ) { stats() if (! ($count{bib} % 50 )); } -print "\n" if ($format eq 'XML'); - -$speed = $count{did} / (time - $start); -my $time = time - $start; -print STDERR <request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } ); + while (my $o = $r->recv) { + die $r->failed->stringify if ($r->failed); + $o = $o->content; + last unless ($o); + $orgs{$o->id} = $o; + } + $r->finish; + print STDERR "OK\n"; + + print STDERR "Retrieving Shelving locations ... "; + $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } ); + + while (my $s = $r->recv) { + die $r->failed->stringify if ($r->failed); + $s = $s->content; + last unless ($s); + $shelves{$s->id} = $s; + } + $r->finish; + print STDERR "OK\n"; + + $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } }; +} + +sub add_bib_holdings { + my $bib = shift; + my $r = shift; + + my $cn_list = $bib->call_numbers; + if ($cn_list && @$cn_list) { + + $count{cn} += @$cn_list; + + my $cp_list = [ map { @{ $_->copies } } @$cn_list ]; + if ($cp_list && @$cp_list) { + + my %cn_map; + push @{$cn_map{$_->call_number}}, $_ for (@$cp_list); + + for my $cn ( @$cn_list ) { + my $cn_map_list = $cn_map{$cn->id}; + + for my $cp ( @$cn_map_list ) { + $count{cp}++; + + $r->append_fields( + MARC::Field->new( + 852, '4', '', + a => $location, + b => $orgs{$cn->owning_lib}->shortname, + b => $orgs{$cp->circ_lib}->shortname, + c => $shelves{$cp->location}->name, + j => $cn->label, + ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()), + p => $cp->barcode, + ($cp->price ? ( y => $dollarsign.$cp->price ) : ()), + ($cp->copy_number ? ( t => $cp->copy_number ) : ()), + ($cp->ref eq 't' ? ( x => 'reference' ) : ()), + ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()), + ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()), + ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()), + ) + ); + + stats() if (! ($count{cp} % 100 )); + } + } + } + } +}