our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize );
sub naco_normalize {
- my $str = NFD(shift);
+ my $str = shift;
my $sf = shift;
# Apply NACO normalization to input string; based on
$str = _normalize_codes($str, $sf);
- return NFC($str);
+ return $str;
}
sub search_normalize {
- my $str = NFD(shift);
+ my $str = shift;
my $sf = shift;
$str = _normalize_substitutions($str, $sf);
$str = _normalize_codes($str, $sf);
- return NFC($str);
+ return $str;
}
sub _normalize_substitutions {
# Assumes input is already in UTF-8.
sub clean_marc {
my $input = shift;
- my $xml = NFD((UNIVERSAL::isa($input, 'MARC::Record')) ? $input->as_xml_record() : $input);
+ my $xml = (UNIVERSAL::isa($input, 'MARC::Record')) ? $input->as_xml_record() : $input;
$xml =~ s/\n//sog;
$xml =~ s/^<\?xml.+\?\s*>//go;
$xml =~ s/>\s+</></go;
$xml =~ s/\p{Cc}//go;
$xml = OpenILS::Application::AppUtils->entityize($xml);
$xml =~ s/[\x00-\x1f]//go;
- return NFC($xml);
+ return $xml;
}
1;
# Embed a version of OpenILS::Application::AppUtils->entityize()
# to avoid having to set PERL5LIB for PostgreSQL as well
-# If we are going to convert non-ASCII characters to XML entities,
-# we had better be dealing with a UTF8 string to begin with
-$xml = NFC(NFD($xml));
-
# Convert raw ampersands to entities
$xml =~ s/&(?!\S+;)/&/gso;
# Embed a version of OpenILS::Application::AppUtils->entityize()
# to avoid having to set PERL5LIB for PostgreSQL as well
- # If we are going to convert non-ASCII characters to XML entities,
- # we had better be dealing with a UTF8 string to begin with
- $xml = NFC(NFD($xml));
-
# Convert raw ampersands to entities
$xml =~ s/&(?!\S+;)/&/gso;
use Unicode::Normalize;
use Encode;
- my $str = NFD(shift);
+ my $str = shift;
my $sf = shift;
# Apply NACO normalization to input string; based on
$str =~ s/^\s+//;
$str =~ s/\s+$//g;
- return lc NFC($str);
+ return lc $str;
$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
-- Currently, the only difference from naco_normalize is that search_normalize
use Unicode::Normalize;
use Encode;
- my $str = NFD(shift);
+ my $str = shift;
my $sf = shift;
# Apply NACO normalization to input string; based on
$str =~ s/^\s+//;
$str =~ s/\s+$//g;
- return lc NFC($str);
+ return lc $str;
$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
CREATE OR REPLACE FUNCTION public.naco_normalize_keep_comma( TEXT ) RETURNS TEXT AS $func$