From 9f5014ee30b7c1296839588d4d8aed0fc3da53ef Mon Sep 17 00:00:00 2001 From: dbs Date: Sun, 2 Jan 2011 05:02:00 +0000 Subject: [PATCH] Add an "--all" option to marc_export and silence some warnings Rather than forcing users to generate a set of record IDs using a separate process, the --all option enables to to export all records of the specified type. Note that the approach is basic: grab the record with the highest ID, then start at ID 0 and count up by 1 while trying to retrieve each record along the way. While this will undoubtedly generate plenty of warnings for deleted records or gaps in the ID sequences, the alternative of retrieving an ID list for potentially millions of records in large Evergreen instances wasn't appealing. Also, $count{did} and $count{bib} resulted in uninitialized variable warnings if no records were actually exported, so initialize them to 0. git-svn-id: svn://svn.open-ils.org/ILS/trunk@19090 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/support-scripts/marc_export | 103 +++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 32 deletions(-) diff --git a/Open-ILS/src/support-scripts/marc_export b/Open-ILS/src/support-scripts/marc_export index 16bd662110..6360b1cda8 100755 --- a/Open-ILS/src/support-scripts/marc_export +++ b/Open-ILS/src/support-scripts/marc_export @@ -23,12 +23,13 @@ use Getopt::Long; my @formats = qw/USMARC UNIMARC XML BRE ARE/; -my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio'); +my ($config,$format,$encoding,$location,$dollarsign,$idl,$help,$holdings,$timeout,$export_mfhd,$type,$all_records) = ('/openils/conf/opensrf_core.xml','USMARC','MARC8','','$',0,undef,undef,0,undef,'biblio',undef); GetOptions( 'help' => \$help, 'items' => \$holdings, 'mfhd' => \$export_mfhd, + 'all' => \$all_records, 'location=s' => \$location, 'money=s' => \$dollarsign, 'config=s' => \$config, @@ -42,10 +43,17 @@ GetOptions( if ($help) { print <<"HELP"; This script exports MARC authority, bibliographic, and serial holdings -records from an Evergreen database. Input to this script consists of -a list of record IDs, with one record ID per line, corresponding to -the record ID in the Evergreen database table of your requested record -type. +records from an Evergreen database. + +Input to this script can consist of a list of record IDs, with one record ID +per line, corresponding to the record ID in the Evergreen database table of +your requested record type. + +Alternately, passing the --all option will attempt to export all records of +the specified type from the Evergreen database. The --all option starts at +record ID 1 and increments the ID by 1 until the largest ID in the database +is retrieved. This may not be very efficient for databases with large gaps +in their ID sequences. Usage: $0 [options] --help or -h This screen. @@ -57,6 +65,7 @@ Usage: $0 [options] are using --holdings and are exporting records that have a lot of items attached to them. --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO] + --all or -a Export all records; ignores input list Additional options for type = 'BIBLIO': --items or -i Include items (holdings) in the output @@ -66,10 +75,16 @@ Usage: $0 [options] --location or -l MARC Location Code for holdings from http://www.loc.gov/marc/organizations/orgshome.html -Example: +Examples: +To export a set of USMARC records in a file named "output_file" based on the +IDs contained in a file named "list_of_ids": cat list_of_ids | $0 > output_file +To export a set of MARC21XML authority records in a file named "output.xml" +for all authority records in the database: + $0 --format XML --type AUTHORITY --all > output.xml + HELP exit; } @@ -127,36 +142,74 @@ if ($holdings) { my $start = time; my $last_time = time; -my %count = (); +my %count = ('bib' => 0, 'did' => 0); my $speed = 0; -while ( my $i = <> ) { + +if ($all_records) { + my $top_record = 0; + if ($type eq 'biblio') { + $top_record = $editor->search_biblio_record_entry([ + {deleted => 'f'}, + {order_by => { 'bre' => 'id DESC' }, limit => 1} + ])->[0]->id; + } elsif ($type eq 'authority') { + $top_record = $editor->search_authority_record_entry([ + {deleted => 'f'}, + {order_by => { 'are' => 'id DESC' }, limit => 1} + ])->[0]->id; + } + for (my $i = 0; $i++ < $top_record;) { + export_record($i); + } +} else { + while ( my $i = <> ) { + export_record($i); + } +} + +print "\n" if ($format eq 'XML'); + +$speed = $count{did} / (time - $start); +my $time = time - $start; +print STDERR <request( "open-ils.cstore.direct.$type.record_entry.retrieve", $i, $flesh ); + my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh ); my $s = $r->recv(timeout => $timeout); if (!$s) { - warn "\n!!!!! Failed trying to read record $i\n"; - next; + warn "\n!!!!! Failed trying to read record $id\n"; + return; } if ($r->failed) { - warn "\n!!!!!! Failed trying to read record $i: " . $r->failed->stringify . "\n"; - next; + warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n"; + return; } if ($r->timed_out) { - warn "\n!!!!!! Timed out trying to read record $i\n"; - next; + warn "\n!!!!!! Timed out trying to read record $id\n"; + return; } $bib = $s->content; $r->finish; $count{bib}++; - next unless $bib; + return unless $bib; if ($format eq 'ARE' or $format eq 'BRE') { print OpenSRF::Utils::JSON->perl2JSON($bib); stats(); $count{did}++; - next; + return; } try { @@ -185,7 +238,7 @@ while ( my $i = <> ) { }; if ($export_mfhd and $type eq 'biblio') { - my $mfhds = $editor->search_serial_record_entry({record => $i, deleted => 'f'}); + my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'}); foreach my $mfhd (@$mfhds) { try { my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format ); @@ -210,20 +263,6 @@ while ( my $i = <> ) { stats() if (! ($count{bib} % 50 )); } -print "\n" if ($format eq 'XML'); - -$speed = $count{did} / (time - $start); -my $time = time - $start; -print STDERR <