First, restore the >= enable before and after ranges in
authority_tag_sf_browse(), after I mistakenly removed it in
r19131; the second storage request for $after does not
stomp on the prior $before results, it simply gets pushed
onto the carefully constructed list of $before results,
ensuring that our target is in the middle of page 0.
Second, we're treating all of the "tag" members in the
method registration as list references now (for the purpose
of searching against 4xx/5xx in the .refs. variants), but
that was blowing up when we registered just a single tag as
a string and tried to treat the scalar as a list reference.
I could have checked to see if what we had incoming was a
reference and dance accordingly, but opted to just define
all single-tag entries as single-element arrays instead.
Applied the same to startwith.
Finally, in r19331 I had used chop() to ensure that an
exact match for startwith would be returned as element 1 on
page 0, instead of appearing as the last element of page -1.
I had said that the right way to do this would be to naco_normalize()
the value to match the normalized afr.value, and so this is what
I have done. Rather than torturously using O:A:Storage:FTS to get
at the naco_normalize() definition, I moved the function into its
own Utils package and adjusted its usage accordingly through the
affected code. One step closer to single-sourcing the function
in the database, as well?
git-svn-id: svn://svn.open-ils.org/ILS/trunk@19332
dcc99617-32d9-48b4-a31d-
7c20da2025e4
package OpenILS::Application::Storage::FTS;
use OpenSRF::Utils::Logger qw/:level/;
use Parse::RecDescent;
-use Unicode::Normalize;
-use Encode;
+use OpenILS::Utils::Normalize qw( naco_normalize );
my $_default_grammar_parser = new Parse::RecDescent ( <<'GRAMMAR' );
GRAMMAR
-# FIXME - this is a copy-and-paste of the naco_normalize
-# stored procedure
-sub naco_normalize {
-
- my $str = decode_utf8(shift);
- my $sf = shift;
-
- # Apply NACO normalization to input string; based on
- # http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
- #
- # Note that unlike a strict reading of the NACO normalization rules,
- # output is returned as lowercase instead of uppercase for compatibility
- # with previous versions of the Evergreen naco_normalize routine.
-
- # Convert to upper-case first; even though final output will be lowercase, doing this will
- # ensure that the German eszett (ß) and certain ligatures (ff, fi, ffl, etc.) will be handled correctly.
- # If there are any bugs in Perl's implementation of upcasing, they will be passed through here.
- $str = uc $str;
-
- # remove non-filing strings
- $str =~ s/\x{0098}.*?\x{009C}//g;
-
- $str = NFKD($str);
-
- # additional substitutions - 3.6.
- $str =~ s/\x{00C6}/AE/g;
- $str =~ s/\x{00DE}/TH/g;
- $str =~ s/\x{0152}/OE/g;
- $str =~ tr/\x{0110}\x{00D0}\x{00D8}\x{0141}\x{2113}\x{02BB}\x{02BC}]['/DDOLl/d;
-
- # transformations based on Unicode category codes
- $str =~ s/[\p{Cc}\p{Cf}\p{Co}\p{Cs}\p{Lm}\p{Mc}\p{Me}\p{Mn}]//g;
-
- if ($sf && $sf =~ /^a/o) {
- my $commapos = index($str, ',');
- if ($commapos > -1) {
- if ($commapos != length($str) - 1) {
- $str =~ s/,/\x07/; # preserve first comma
- }
- }
- }
-
- # since we've stripped out the control characters, we can now
- # use a few as placeholders temporarily
- $str =~ tr/+&@\x{266D}\x{266F}#/\x01\x02\x03\x04\x05\x06/;
- $str =~ s/[\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\p{Sk}\p{Sm}\p{So}\p{Zl}\p{Zp}\p{Zs}]/ /g;
- $str =~ tr/\x01\x02\x03\x04\x05\x06\x07/+&@\x{266D}\x{266F}#,/;
-
- # decimal digits
- $str =~ tr/\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{07C0}-\x{07C9}\x{0966}-\x{096F}\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}\x{0BE6}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}\x{1040}-\x{1049}\x{1090}-\x{1099}\x{17E0}-\x{17E9}\x{1810}-\x{1819}\x{1946}-\x{194F}\x{19D0}-\x{19D9}\x{1A80}-\x{1A89}\x{1A90}-\x{1A99}\x{1B50}-\x{1B59}\x{1BB0}-\x{1BB9}\x{1C40}-\x{1C49}\x{1C50}-\x{1C59}\x{A620}-\x{A629}\x{A8D0}-\x{A8D9}\x{A900}-\x{A909}\x{A9D0}-\x{A9D9}\x{AA50}-\x{AA59}\x{ABF0}-\x{ABF9}\x{FF10}-\x{FF19}/0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9/;
-
- # intentionally skipping step 8 of the NACO algorithm; if the string
- # gets normalized away, that's fine.
-
- # leading and trailing spaces
- $str =~ s/\s+/ /g;
- $str =~ s/^\s+//;
- $str =~ s/\s+$//g;
-
- return lc $str;
-}
-
-#' stupid vim syntax highlighting ...
-
sub compile {
$log->debug("You must override me somewhere, or I will make searching really slow!!!!",ERROR);;
use OpenSRF::EX qw/:try/;
use OpenILS::Application::Storage::FTS;
use OpenILS::Utils::Fieldmapper;
+use OpenILS::Utils::Normalize qw( naco_normalize );
use OpenSRF::Utils::Logger qw/:level/;
use OpenSRF::Utils::Cache;
use Data::Dumper;
for my $t ( @tags ) {
for my $search ( @searches ) {
my $sf = $$search{subfield};
- my $term = OpenILS::Application::Storage::FTS::naco_normalize($$search{term}, $sf);
+ my $term = naco_normalize($$search{term}, $sf);
$tag = [$tag] if (!ref($tag));
use strict;
use warnings;
+use OpenILS::Utils::Normalize qw( naco_normalize );
# All OpenSRF applications must be based on OpenSRF::Application or
# a subclass thereof. Makes sense, eh?
__PACKAGE__->register_method(
method => 'general_authority_browse',
api_name => 'open-ils.supercat.authority.title.browse',
- tag => '130', subfield => 'a',
+ tag => ['130'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
__PACKAGE__->register_method(
method => 'general_authority_browse',
api_name => 'open-ils.supercat.authority.topic.browse',
- tag => '150', subfield => 'a',
+ tag => ['150'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
__PACKAGE__->register_method(
method => 'general_authority_browse',
api_name => 'open-ils.supercat.authority.title.refs.browse',
- tag => '130', subfield => 'a',
+ tag => ['130'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
__PACKAGE__->register_method(
method => 'general_authority_browse',
api_name => 'open-ils.supercat.authority.topic.refs.browse',
- tag => '150', subfield => 'a',
+ tag => ['150'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
my $page_size = shift || 9;
my $page = shift || 0;
+ # Match authority.full_rec normalization
+ $value = naco_normalize($value, $subfield);
+
my ($before_limit,$after_limit) = (0,0);
my ($before_offset,$after_offset) = (0,0);
}
my @list = ();
- if ($page < 0) {
+ if ($page <= 0) {
my $before = $_storage->request(
"open-ils.cstore.json_query.atomic",
{ select => { afr => [qw/record value/] },
from => { 'are', 'afr' },
where => {
- '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '<' => lc($value) } },
+ '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '<' => $value } },
'+are' => { 'deleted' => 'f' }
},
order_by => { afr => { value => 'desc' } },
{ select => { afr => [qw/record value/] },
from => { 'are', 'afr' },
where => {
- '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '>=' => lc($value) } },
+ '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '>=' => $value } },
'+are' => { 'deleted' => 'f' }
},
order_by => { afr => { value => 'asc' } },
__PACKAGE__->register_method(
method => 'general_authority_startwith',
api_name => 'open-ils.supercat.authority.title.startwith',
- tag => '130', subfield => 'a',
+ tag => ['130'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
__PACKAGE__->register_method(
method => 'general_authority_startwith',
api_name => 'open-ils.supercat.authority.topic.startwith',
- tag => '150', subfield => 'a',
+ tag => ['150'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
__PACKAGE__->register_method(
method => 'general_authority_startwith',
api_name => 'open-ils.supercat.authority.title.refs.startwith',
- tag => '130', subfield => 'a',
+ tag => ['130'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
__PACKAGE__->register_method(
method => 'general_authority_startwith',
api_name => 'open-ils.supercat.authority.topic.refs.startwith',
- tag => '150', subfield => 'a',
+ tag => ['150'], subfield => 'a',
api_level => 1,
argc => 1,
signature =>
my $limit = shift || 10;
my $page = shift || 0;
- # Chop the last character from the incoming value to return it on page 0
- chop($value);
+ # Match authority.full_rec normalization
+ $value = naco_normalize($value, $subfield);
my $ref_limit = $limit;
my $offset = $limit * abs($page);
{ select => { afr => [qw/record value/] },
from => { 'afr', 'are' },
where => {
- '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '<' => lc($value) } },
+ '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '<' => $value } },
'+are' => { deleted => 'f' }
},
order_by => { afr => { value => 'desc' } },
{ select => { afr => [qw/record value/] },
from => { 'afr', 'are' },
where => {
- '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '>=' => lc($value) } },
+ '+afr' => { tag => \@ref_tags, subfield => $subfield, value => { '>=' => $value } },
'+are' => { deleted => 'f' }
},
order_by => { afr => { value => 'asc' } },
1;
-# vim: noet:ts=4:sw=4
+# vim: et:ts=4:sw=4
--- /dev/null
+package OpenILS::Utils::Normalize;
+use strict;
+use warnings;
+use Unicode::Normalize;
+use Encode;
+
+use Exporter 'import';
+our @EXPORT_OK = qw( naco_normalize );
+
+sub naco_normalize {
+
+ my $str = decode_utf8(shift);
+ my $sf = shift;
+
+ # Apply NACO normalization to input string; based on
+ # http://www.loc.gov/catdir/pcc/naco/SCA_PccNormalization_Final_revised.pdf
+ #
+ # Note that unlike a strict reading of the NACO normalization rules,
+ # output is returned as lowercase instead of uppercase for compatibility
+ # with previous versions of the Evergreen naco_normalize routine.
+
+ # Convert to upper-case first; even though final output will be lowercase, doing this will
+ # ensure that the German eszett (ß) and certain ligatures (ff, fi, ffl, etc.) will be handled correctly.
+ # If there are any bugs in Perl's implementation of upcasing, they will be passed through here.
+ $str = uc $str;
+
+ # remove non-filing strings
+ $str =~ s/\x{0098}.*?\x{009C}//g;
+
+ $str = NFKD($str);
+
+ # additional substitutions - 3.6.
+ $str =~ s/\x{00C6}/AE/g;
+ $str =~ s/\x{00DE}/TH/g;
+ $str =~ s/\x{0152}/OE/g;
+ $str =~ tr/\x{0110}\x{00D0}\x{00D8}\x{0141}\x{2113}\x{02BB}\x{02BC}]['/DDOLl/d;
+
+ # transformations based on Unicode category codes
+ $str =~ s/[\p{Cc}\p{Cf}\p{Co}\p{Cs}\p{Lm}\p{Mc}\p{Me}\p{Mn}]//g;
+
+ if ($sf && $sf =~ /^a/o) {
+ my $commapos = index($str, ',');
+ if ($commapos > -1) {
+ if ($commapos != length($str) - 1) {
+ $str =~ s/,/\x07/; # preserve first comma
+ }
+ }
+ }
+
+ # since we've stripped out the control characters, we can now
+ # use a few as placeholders temporarily
+ $str =~ tr/+&@\x{266D}\x{266F}#/\x01\x02\x03\x04\x05\x06/;
+ $str =~ s/[\p{Pc}\p{Pd}\p{Pe}\p{Pf}\p{Pi}\p{Po}\p{Ps}\p{Sk}\p{Sm}\p{So}\p{Zl}\p{Zp}\p{Zs}]/ /g;
+ $str =~ tr/\x01\x02\x03\x04\x05\x06\x07/+&@\x{266D}\x{266F}#,/;
+
+ # decimal digits
+ $str =~ tr/\x{0660}-\x{0669}\x{06F0}-\x{06F9}\x{07C0}-\x{07C9}\x{0966}-\x{096F}\x{09E6}-\x{09EF}\x{0A66}-\x{0A6F}\x{0AE6}-\x{0AEF}\x{0B66}-\x{0B6F}\x{0BE6}-\x{0BEF}\x{0C66}-\x{0C6F}\x{0CE6}-\x{0CEF}\x{0D66}-\x{0D6F}\x{0E50}-\x{0E59}\x{0ED0}-\x{0ED9}\x{0F20}-\x{0F29}\x{1040}-\x{1049}\x{1090}-\x{1099}\x{17E0}-\x{17E9}\x{1810}-\x{1819}\x{1946}-\x{194F}\x{19D0}-\x{19D9}\x{1A80}-\x{1A89}\x{1A90}-\x{1A99}\x{1B50}-\x{1B59}\x{1BB0}-\x{1BB9}\x{1C40}-\x{1C49}\x{1C50}-\x{1C59}\x{A620}-\x{A629}\x{A8D0}-\x{A8D9}\x{A900}-\x{A909}\x{A9D0}-\x{A9D9}\x{AA50}-\x{AA59}\x{ABF0}-\x{ABF9}\x{FF10}-\x{FF19}/0-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-90-9/;
+
+ # intentionally skipping step 8 of the NACO algorithm; if the string
+ # gets normalized away, that's fine.
+
+ # leading and trailing spaces
+ $str =~ s/\s+/ /g;
+ $str =~ s/^\s+//;
+ $str =~ s/\s+$//g;
+
+ return lc $str;
+}
+
+1;
use Unicode::Normalize;
use DBI;
-use OpenILS::Application::Storage::FTS;
+use OpenILS::Utils::Normalize qw( naco_normalize );
# This could be made better in at least one of two ways (or both);
# 1. put PL/Perl code that doesn't require a database into external
[ '♭©®♯', '♭ ♯', 'other symbols' ],
);
-# test copy of naco_normalize in OpenILS::Application::Storage::FTS
+# test copy of naco_normalize in OpenILS::Utils::Normalize
foreach my $case (@test_cases) {
- is(OpenILS::Application::Storage::FTS::naco_normalize($case->[0]), $case->[1], $case->[2] . ' (FTS.pm)');
+ is(naco_normalize($case->[0]), $case->[1], $case->[2] . ' (Normalize.pm)');
}
-is(OpenILS::Application::Storage::FTS::naco_normalize('Smith, Jane. Poet, painter, and author', 'a'),
+is(naco_normalize('Smith, Jane. Poet, painter, and author', 'a'),
'smith, jane poet painter and author',
- 'retain first comma (FTS.pm)');
+ 'retain first comma (Normalize.pm)');
SKIP: {
my $dbh = DBI->connect($dsn, $db_user, $db_pw, {AutoCommit => 1, pg_enable_utf8 => 1, PrintError => 0});
page = 0;
}
- /* temporarily use 'startwith' instead of 'browse' until browse is repaired */
- var url = '/opac/extras/startwith/marcxml/'
+ var url = '/opac/extras/browse/marcxml/'
+ type + '.refs'
+ '/1' // OU - currently unscoped
+ '/' + sf.toString()