From: Dan Scott Date: Mon, 21 Oct 2013 15:58:06 +0000 (-0400) Subject: Encode.pm change to the UTF8 flag X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=796cf6bcb0d9b7c721bc48492595141f93243b76;p=evergreen%2Fmasslnc.git Encode.pm change to the UTF8 flag In the past, decode_utf8() silently ignored attempts to decode data with the UTF8 flag on. However, Encode.pm changed around 2.53 and resulted in wide-character errors preventing the likes of --load-all-sample from working. Since our data should already be UTF8 when it hits these functions, we should not be trying to decode it in the first place. Signed-off-by: Dan Wells Signed-off-by: Dan Scott --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/AppUtils.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/AppUtils.pm index 5a6d38582d..b5900eda1c 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/AppUtils.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/AppUtils.pm @@ -1513,10 +1513,6 @@ sub entityize { my($self, $string, $form) = @_; $form ||= ""; - # If we're going to convert non-ASCII characters to XML entities, - # we had better be dealing with a UTF8 string to begin with - $string = decode_utf8($string); - if ($form eq 'D') { $string = NFD($string); } else { diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm index ac82ba4954..1102d10a94 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm @@ -12,7 +12,7 @@ use Exporter 'import'; our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize ); sub naco_normalize { - my $str = decode_utf8(shift); + my $str = shift; my $sf = shift; # Apply NACO normalization to input string; based on @@ -32,7 +32,7 @@ sub naco_normalize { } sub search_normalize { - my $str = decode_utf8(shift); + my $str = shift; my $sf = shift; $str = _normalize_substitutions($str, $sf); @@ -111,7 +111,7 @@ sub _normalize_codes { # Assumes input is already in UTF-8. sub clean_marc { my $input = shift; - my $xml = decode_utf8((UNIVERSAL::isa($input, 'MARC::Record')) ? $input->as_xml_record() : $input); + my $xml = (UNIVERSAL::isa($input, 'MARC::Record')) ? $input->as_xml_record() : $input; $xml =~ s/\n//sog; $xml =~ s/^<\?xml.+\?\s*>//go; $xml =~ s/>\s+entityize() # to avoid having to set PERL5LIB for PostgreSQL as well -# If we are going to convert non-ASCII characters to XML entities, -# we had better be dealing with a UTF8 string to begin with -$xml = decode_utf8($xml); - $xml = NFC($xml); # Convert raw ampersands to entities @@ -654,10 +650,6 @@ if ($create or $munge) { # Embed a version of OpenILS::Application::AppUtils->entityize() # to avoid having to set PERL5LIB for PostgreSQL as well - # If we are going to convert non-ASCII characters to XML entities, - # we had better be dealing with a UTF8 string to begin with - $xml = decode_utf8($xml); - $xml = NFC($xml); # Convert raw ampersands to entities @@ -700,7 +692,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $ use Unicode::Normalize; use Encode; - my $str = decode_utf8(shift); + my $str = shift; my $sf = shift; # Apply NACO normalization to input string; based on @@ -766,7 +758,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS use Unicode::Normalize; use Encode; - my $str = decode_utf8(shift); + my $str = shift; my $sf = shift; # Apply NACO normalization to input string; based on