From a7b2c1ee87e9853f6f7ae333a94fecadbf938c8f Mon Sep 17 00:00:00 2001 From: Jason Stephenson Date: Wed, 9 Nov 2011 15:34:27 -0500 Subject: [PATCH] Add clean_marc function to OpenILS::Utils::Normalize. Add a library function to clean up MARC records for how we like to store them in the biblio.record_entry table. Having this in a library will reduce code duplication. Also, replace nearly identical code in OpenILS::Application::Vandelay and OpenILS::Application::Acq::Order with calls to this new function. Signed-off-by: Jason Stephenson --- .../perlmods/lib/OpenILS/Application/Acq/Order.pm | 9 ++------ .../perlmods/lib/OpenILS/Application/Vandelay.pm | 8 ++----- .../src/perlmods/lib/OpenILS/Utils/Normalize.pm | 26 +++++++++++++++++++++- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm index 8e478cad88..aa4ae5db6c 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm @@ -181,6 +181,7 @@ use OpenSRF::Utils::JSON; use OpenSRF::AppSession; use OpenILS::Utils::Fieldmapper; use OpenILS::Utils::CStoreEditor q/:funcs/; +use OpenILS::Utils::Normalize qw/clean_marc/; use OpenILS::Const qw/:const/; use OpenSRF::EX q/:try/; use OpenILS::Application::AppUtils; @@ -1254,13 +1255,7 @@ sub upload_records { last unless $r; try { - ($xml = $r->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; - + $xml = clean_marc($r); } catch Error with { $err = shift; $logger->warn("Proccessing XML of record $count in set $key failed with error $err. Skipping this record"); diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm index 62dca2e073..59a2fc0758 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm @@ -9,6 +9,7 @@ use OpenSRF::Utils::SettingsClient; use OpenSRF::Utils::Cache; use OpenILS::Utils::Fieldmapper; use OpenILS::Utils::CStoreEditor qw/:funcs/; +use OpenILS::Utils::Normalize qw/clean_marc/; use MARC::Batch; use MARC::Record; use MARC::File::XML ( BinaryEncoding => 'UTF-8' ); @@ -286,12 +287,7 @@ sub process_spool { $logger->info("processing record $count"); try { - (my $xml = $r->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; + my $xml = clean_marc($r); my $qrec; # Check the leader to ensure we've got something resembling the expected diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm index d71503c5e1..fae640325c 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm @@ -3,9 +3,13 @@ use strict; use warnings; use Unicode::Normalize; use Encode; +use UNIVERSAL qw/isa/; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'UTF-8' ); +use OpenILS::Application::AppUtils; use Exporter 'import'; -our @EXPORT_OK = qw( naco_normalize ); +our @EXPORT_OK = qw( naco_normalize clean_marc ); sub naco_normalize { @@ -67,4 +71,24 @@ sub naco_normalize { return lc $str; } +# Cleans up a MARC::Record or MARCXML string for storage in the +# Open-ILS database. +# +# Takes either a MARC::Record or a string of MARCXML. +# +# Returns a string of MARCXML as Open-ILS likes to store it. +# +# Assumes input is already in UTF-8. +sub clean_marc { + my $input = shift; + my $xml = (isa $input, 'MARC::Record') ? $input->as_xml_record() : $input; + $xml =~ s/\n//sog; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+entityize($xml); + $xml =~ s/[\x00-\x1f]//go; + return $xml; +} + 1; -- 2.11.0