From ac7df30c0ab3bee04d2526765d6078384e5d70c9 Mon Sep 17 00:00:00 2001 From: dbs Date: Thu, 16 Apr 2009 19:12:35 +0000 Subject: [PATCH] miker says MARC::Batch would be more efficient; okay. Baby steps towards pipeline processing. git-svn-id: svn://svn.open-ils.org/ILS-Contrib/conifer/trunk@341 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- tools/migration-scripts/fixURIs.pl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/migration-scripts/fixURIs.pl b/tools/migration-scripts/fixURIs.pl index 70c8897513..41d6ce7ecd 100644 --- a/tools/migration-scripts/fixURIs.pl +++ b/tools/migration-scripts/fixURIs.pl @@ -1,7 +1,11 @@ #!/usr/bin/perl -w use strict; +use warnings; + +use Error qw/:try/; use Getopt::Long; use MARC::File::XML( BinaryEncoding => 'utf8', RecordFormat => 'USMARC' ); +use MARC::Batch; # Clean up URIs from MARCXML records prior to batch ingest # * If we detect a proxy URL: @@ -23,11 +27,16 @@ if (!$input_file or !$output_file) { exit(); } -my $input = MARC::File::XML->in( $input_file ); +my $input = new MARC::Batch( 'XML', $input_file ); +$input->strict_off(); my $output = MARC::File::XML->out( $output_file ); my ($touched, $url_cnt, $ind1_cnt, $ind2_cnt, $sub9_cnt) = (0, 0, 0, 0, 0); -while (my $marc = $input->next()) { +my $marc; +while ( try { $marc = $input->next() } otherwise { $marc = -1 } ) { + # Skip the record if we couldn't even decode it + next if ($marc == -1); + my $edited = 0; my @uri_fields = $marc->field('856'); foreach my $uri (@uri_fields) { -- 2.11.0