From 5d6169e29075b77ca02f726423c5bff817a3bc4d Mon Sep 17 00:00:00 2001 From: miker Date: Tue, 1 May 2007 05:34:32 +0000 Subject: [PATCH] fixed encoding use when reading different MARC formats git-svn-id: svn://svn.open-ils.org/ILS/trunk@7184 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/extras/import/marc2bre.pl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/Open-ILS/src/extras/import/marc2bre.pl b/Open-ILS/src/extras/import/marc2bre.pl index 9cb2ca9fc9..163acad269 100755 --- a/Open-ILS/src/extras/import/marc2bre.pl +++ b/Open-ILS/src/extras/import/marc2bre.pl @@ -18,8 +18,6 @@ use Data::Dumper; use Unicode::Normalize; use Encode; -use bytes; - use FileHandle; use Time::HiRes qw/time/; use Getopt::Long; @@ -27,9 +25,9 @@ use MARC::Batch; use MARC::File::XML ( BinaryEncoding => 'utf-8' ); use MARC::Charset; -MARC::Charset->ignore_errors(1); +#MARC::Charset->ignore_errors(1); -my ($id_field, $recid, $user, $config, $marctype, $keyfile, $dontuse_file, $enc, @files, @trash_fields) = +my ($id_field, $recid, $user, $config, $marctype, $keyfile, $dontuse_file, $enc, $force_enc, @files, @trash_fields) = ('', 1, 1, '/openils/conf/bootstrap.conf', 'USMARC'); GetOptions( @@ -38,6 +36,7 @@ GetOptions( 'idfield=s' => \$id_field, 'user=s' => \$user, 'encoding=s' => \$enc, + 'hard_encoding' => \$force_enc, 'keyfile=s' => \$keyfile, 'config=s' => \$config, 'file=s' => \@files, @@ -50,6 +49,12 @@ if ($enc) { MARC::Charset->assume_encoding($enc); } +if (uc($marctype) eq 'XML') { + 'open'->use(':utf8'); +} else { + bytes->use(); +} + @files = @ARGV if (!@files); my @ses; @@ -141,8 +146,10 @@ while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) { } } - $rec = preprocess($rec); + my $tcn; + ($rec, $tcn) = preprocess($rec); $rec->delete_field( $_ ) for ($rec->field($id_field)); + $rec->append_fields( $tcn ); if (!$rec) { next; @@ -251,9 +258,7 @@ sub preprocess { b => do { $source_map{$source} || 'System' }, ); - $rec->append_fields($tcn); - - return $rec; + return ($rec,$tcn); } sub entityize { -- 2.11.0