use OpenSRF::AppSession;
use OpenILS::Utils::Fieldmapper;
use OpenILS::Utils::CStoreEditor q/:funcs/;
+use OpenILS::Utils::Normalize qw/clean_marc/;
use OpenILS::Const qw/:const/;
use OpenSRF::EX q/:try/;
use OpenILS::Application::AppUtils;
last unless $r;
try {
- ($xml = $r->as_xml_record()) =~ s/\n//sog;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = $U->entityize($xml);
- $xml =~ s/[\x00-\x1f]//go;
-
+ $xml = clean_marc($r);
} catch Error with {
$err = shift;
$logger->warn("Proccessing XML of record $count in set $key failed with error $err. Skipping this record");
use OpenSRF::Utils::Cache;
use OpenILS::Utils::Fieldmapper;
use OpenILS::Utils::CStoreEditor qw/:funcs/;
+use OpenILS::Utils::Normalize qw/clean_marc/;
use MARC::Batch;
use MARC::Record;
use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
$logger->info("processing record $count");
try {
- (my $xml = $r->as_xml_record()) =~ s/\n//sog;
- $xml =~ s/^<\?xml.+\?\s*>//go;
- $xml =~ s/>\s+</></go;
- $xml =~ s/\p{Cc}//go;
- $xml = $U->entityize($xml);
- $xml =~ s/[\x00-\x1f]//go;
+ my $xml = clean_marc($r);
my $qrec;
# Check the leader to ensure we've got something resembling the expected
use warnings;
use Unicode::Normalize;
use Encode;
+use UNIVERSAL qw/isa/;
+use MARC::Record;
+use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
+use OpenILS::Application::AppUtils;
use Exporter 'import';
-our @EXPORT_OK = qw( naco_normalize search_normalize );
+our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize );
sub naco_normalize {
my $str = decode_utf8(shift);
return lc $str;
}
+# Cleans up a MARC::Record or MARCXML string for storage in the
+# Open-ILS database.
+#
+# Takes either a MARC::Record or a string of MARCXML.
+#
+# Returns a string of MARCXML as Open-ILS likes to store it.
+#
+# Assumes input is already in UTF-8.
+sub clean_marc {
+ my $input = shift;
+ my $xml = (isa $input, 'MARC::Record') ? $input->as_xml_record() : $input;
+ $xml =~ s/\n//sog;
+ $xml =~ s/^<\?xml.+\?\s*>//go;
+ $xml =~ s/>\s+</></go;
+ $xml =~ s/\p{Cc}//go;
+ $xml = OpenILS::Application::AppUtils->entityize($xml);
+ $xml =~ s/[\x00-\x1f]//go;
+ return $xml;
+}
+
1;