From: Dan Scott Date: Mon, 21 Oct 2013 15:58:06 +0000 (-0400) Subject: Encode.pm change to the UTF8 flag X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=62cbc55f7dac21d568382fb7d4c59b5d597aaec6;p=working%2FEvergreen.git Encode.pm change to the UTF8 flag In the past, decode_utf8() has the effect of applying the UTF8 flag to the decoded strings. However, Encode.pm changed around 2.53 and resulted in wide-character errors preventing the likes of --load-all-sample from working. It seems that we can simply normalize the input to normalization form D, then recompose it to form C on the way out, and sidestep the decode_utf8() step entirely. Signed-off-by: Dan Scott --- diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql index f481bab4d0..df935907f8 100644 --- a/Open-ILS/src/sql/Pg/002.functions.config.sql +++ b/Open-ILS/src/sql/Pg/002.functions.config.sql @@ -525,9 +525,7 @@ $xml =~ s/\p{Cc}//go; # If we are going to convert non-ASCII characters to XML entities, # we had better be dealing with a UTF8 string to begin with -$xml = decode_utf8($xml); - -$xml = NFC($xml); +$xml = NFC(NFD($xml)); # Convert raw ampersands to entities $xml =~ s/&(?!\S+;)/&/gso; @@ -656,9 +654,7 @@ if ($create or $munge) { # If we are going to convert non-ASCII characters to XML entities, # we had better be dealing with a UTF8 string to begin with - $xml = decode_utf8($xml); - - $xml = NFC($xml); + $xml = NFC(NFD($xml)); # Convert raw ampersands to entities $xml =~ s/&(?!\S+;)/&/gso; @@ -700,7 +696,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $ use Unicode::Normalize; use Encode; - my $str = decode_utf8(shift); + my $str = NFD(shift); my $sf = shift; # Apply NACO normalization to input string; based on @@ -755,7 +751,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $ $str =~ s/^\s+//; $str =~ s/\s+$//g; - return lc $str; + return lc NFC($str); $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE; -- Currently, the only difference from naco_normalize is that search_normalize @@ -766,7 +762,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS use Unicode::Normalize; use Encode; - my $str = decode_utf8(shift); + my $str = NFD(shift); my $sf = shift; # Apply NACO normalization to input string; based on @@ -821,7 +817,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS $str =~ s/^\s+//; $str =~ s/\s+$//g; - return lc $str; + return lc NFC($str); $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE; CREATE OR REPLACE FUNCTION public.naco_normalize_keep_comma( TEXT ) RETURNS TEXT AS $func$