From c694929e4ba58ecf13e5903624ec6c784c37f439 Mon Sep 17 00:00:00 2001 From: Dan Scott <dscott@laurentian.ca> Date: Sun, 4 Mar 2012 03:00:49 -0500 Subject: [PATCH] Decode the string to UTF8, always Even if you know that the caller is passing you a decoded UTF8 string, you can and should decode it yourself, because some day a caller isn't going to decode it first and you're going to wind up in misery trying to figure out why you're broken. In this case, it resolves the mystery of why the unit tests failed when Vandelay seemed to be ticking along fine. As the comment in clean_marc() mentioned, "assume input is already in UTF8" - but as soon as it isn't, boom. Signed-off-by: Dan Scott <dscott@laurentian.ca> Signed-off-by: Jason Stephenson <jstephenson@mvlc.org> --- Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm index 90355768c2..0823e5687a 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm @@ -111,12 +111,12 @@ sub _normalize_codes { # Assumes input is already in UTF-8. sub clean_marc { my $input = shift; - my $xml = (isa $input, 'MARC::Record') ? $input->as_xml_record() : $input; + my $xml = decode_utf8((isa $input, 'MARC::Record') ? $input->as_xml_record() : $input); $xml =~ s/\n//sog; $xml =~ s/^<\?xml.+\?\s*>//go; $xml =~ s/>\s+</></go; - $xml = OpenILS::Application::AppUtils->entityize($xml); $xml =~ s/\p{Cc}//go; + $xml = OpenILS::Application::AppUtils->entityize($xml); $xml =~ s/[\x00-\x1f]//go; return $xml; } -- 2.11.0