From: Dan Scott <dscott@laurentian.ca>
Date: Mon, 21 Oct 2013 15:58:06 +0000 (-0400)
Subject: Encode.pm change to the UTF8 flag
X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=62cbc55f7dac21d568382fb7d4c59b5d597aaec6;p=working%2FEvergreen.git

Encode.pm change to the UTF8 flag

In the past, decode_utf8() has the effect of applying the UTF8 flag to
the decoded strings. However, Encode.pm changed around 2.53 and resulted
in wide-character errors preventing the likes of --load-all-sample from
working.

It seems that we can simply normalize the input to normalization form D,
then recompose it to form C on the way out, and sidestep the
decode_utf8() step entirely.

Signed-off-by: Dan Scott <dscott@laurentian.ca>
---

diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql
index f481bab4d0..df935907f8 100644
--- a/Open-ILS/src/sql/Pg/002.functions.config.sql
+++ b/Open-ILS/src/sql/Pg/002.functions.config.sql
@@ -525,9 +525,7 @@ $xml =~ s/\p{Cc}//go;
 
 # If we are going to convert non-ASCII characters to XML entities,
 # we had better be dealing with a UTF8 string to begin with
-$xml = decode_utf8($xml);
-
-$xml = NFC($xml);
+$xml = NFC(NFD($xml));
 
 # Convert raw ampersands to entities
 $xml =~ s/&(?!\S+;)/&amp;/gso;
@@ -656,9 +654,7 @@ if ($create or $munge) {
 
     # If we are going to convert non-ASCII characters to XML entities,
     # we had better be dealing with a UTF8 string to begin with
-    $xml = decode_utf8($xml);
-
-    $xml = NFC($xml);
+    $xml = NFC(NFD($xml));
 
     # Convert raw ampersands to entities
     $xml =~ s/&(?!\S+;)/&amp;/gso;
@@ -700,7 +696,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $
     use Unicode::Normalize;
     use Encode;
 
-    my $str = decode_utf8(shift);
+    my $str = NFD(shift);
     my $sf = shift;
 
     # Apply NACO normalization to input string; based on
@@ -755,7 +751,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $
     $str =~ s/^\s+//;
     $str =~ s/\s+$//g;
 
-    return lc $str;
+    return lc NFC($str);
 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
 
 -- Currently, the only difference from naco_normalize is that search_normalize
@@ -766,7 +762,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS
     use Unicode::Normalize;
     use Encode;
 
-    my $str = decode_utf8(shift);
+    my $str = NFD(shift);
     my $sf = shift;
 
     # Apply NACO normalization to input string; based on
@@ -821,7 +817,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS
     $str =~ s/^\s+//;
     $str =~ s/\s+$//g;
 
-    return lc $str;
+    return lc NFC($str);
 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
 
 CREATE OR REPLACE FUNCTION public.naco_normalize_keep_comma( TEXT ) RETURNS TEXT AS $func$