use Unicode::Normalize;
use Encode;
-use bytes;
-
use FileHandle;
use Time::HiRes qw/time/;
use Getopt::Long;
use MARC::File::XML ( BinaryEncoding => 'utf-8' );
use MARC::Charset;
-MARC::Charset->ignore_errors(1);
+#MARC::Charset->ignore_errors(1);
-my ($id_field, $recid, $user, $config, $marctype, $keyfile, $dontuse_file, $enc, @files, @trash_fields) =
+my ($id_field, $recid, $user, $config, $marctype, $keyfile, $dontuse_file, $enc, $force_enc, @files, @trash_fields) =
('', 1, 1, '/openils/conf/bootstrap.conf', 'USMARC');
GetOptions(
'idfield=s' => \$id_field,
'user=s' => \$user,
'encoding=s' => \$enc,
+ 'hard_encoding' => \$force_enc,
'keyfile=s' => \$keyfile,
'config=s' => \$config,
'file=s' => \@files,
MARC::Charset->assume_encoding($enc);
}
+if (uc($marctype) eq 'XML') {
+ 'open'->use(':utf8');
+} else {
+ bytes->use();
+}
+
@files = @ARGV if (!@files);
my @ses;
}
}
- $rec = preprocess($rec);
+ my $tcn;
+ ($rec, $tcn) = preprocess($rec);
$rec->delete_field( $_ ) for ($rec->field($id_field));
+ $rec->append_fields( $tcn );
if (!$rec) {
next;
b => do { $source_map{$source} || 'System' },
);
- $rec->append_fields($tcn);
-
- return $rec;
+ return ($rec,$tcn);
}
sub entityize {