From: Dan Scott Date: Wed, 23 Oct 2013 02:44:12 +0000 (-0400) Subject: Test Dan Wells' theory that we don't need NFD at all X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=a3e72c7c8eed9893b1af827cdd0977b589339e68;p=working%2FEvergreen.git Test Dan Wells' theory that we don't need NFD at all It works well enough for the unit tests we have, anyway. Which puts us even with where we swapped in NFD() in place of decode_utf8() and fixed the unit tests in the first place -- but now with less code. Signed-off-by: Dan Scott --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm index 24c85bf93b..1102d10a94 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm @@ -12,7 +12,7 @@ use Exporter 'import'; our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize ); sub naco_normalize { - my $str = NFD(shift); + my $str = shift; my $sf = shift; # Apply NACO normalization to input string; based on @@ -28,17 +28,17 @@ sub naco_normalize { $str = _normalize_codes($str, $sf); - return NFC($str); + return $str; } sub search_normalize { - my $str = NFD(shift); + my $str = shift; my $sf = shift; $str = _normalize_substitutions($str, $sf); $str = _normalize_codes($str, $sf); - return NFC($str); + return $str; } sub _normalize_substitutions { @@ -111,14 +111,14 @@ sub _normalize_codes { # Assumes input is already in UTF-8. sub clean_marc { my $input = shift; - my $xml = NFD((UNIVERSAL::isa($input, 'MARC::Record')) ? $input->as_xml_record() : $input); + my $xml = (UNIVERSAL::isa($input, 'MARC::Record')) ? $input->as_xml_record() : $input; $xml =~ s/\n//sog; $xml =~ s/^<\?xml.+\?\s*>//go; $xml =~ s/>\s+entityize($xml); $xml =~ s/[\x00-\x1f]//go; - return NFC($xml); + return $xml; } 1; diff --git a/Open-ILS/src/sql/Pg/002.functions.config.sql b/Open-ILS/src/sql/Pg/002.functions.config.sql index df935907f8..cd7df41b90 100644 --- a/Open-ILS/src/sql/Pg/002.functions.config.sql +++ b/Open-ILS/src/sql/Pg/002.functions.config.sql @@ -523,10 +523,6 @@ $xml =~ s/\p{Cc}//go; # Embed a version of OpenILS::Application::AppUtils->entityize() # to avoid having to set PERL5LIB for PostgreSQL as well -# If we are going to convert non-ASCII characters to XML entities, -# we had better be dealing with a UTF8 string to begin with -$xml = NFC(NFD($xml)); - # Convert raw ampersands to entities $xml =~ s/&(?!\S+;)/&/gso; @@ -652,10 +648,6 @@ if ($create or $munge) { # Embed a version of OpenILS::Application::AppUtils->entityize() # to avoid having to set PERL5LIB for PostgreSQL as well - # If we are going to convert non-ASCII characters to XML entities, - # we had better be dealing with a UTF8 string to begin with - $xml = NFC(NFD($xml)); - # Convert raw ampersands to entities $xml =~ s/&(?!\S+;)/&/gso; @@ -696,7 +688,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $ use Unicode::Normalize; use Encode; - my $str = NFD(shift); + my $str = shift; my $sf = shift; # Apply NACO normalization to input string; based on @@ -751,7 +743,7 @@ CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $ $str =~ s/^\s+//; $str =~ s/\s+$//g; - return lc NFC($str); + return lc $str; $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE; -- Currently, the only difference from naco_normalize is that search_normalize @@ -762,7 +754,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS use Unicode::Normalize; use Encode; - my $str = NFD(shift); + my $str = shift; my $sf = shift; # Apply NACO normalization to input string; based on @@ -817,7 +809,7 @@ CREATE OR REPLACE FUNCTION public.search_normalize( TEXT, TEXT ) RETURNS TEXT AS $str =~ s/^\s+//; $str =~ s/\s+$//g; - return lc NFC($str); + return lc $str; $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE; CREATE OR REPLACE FUNCTION public.naco_normalize_keep_comma( TEXT ) RETURNS TEXT AS $func$