From: Dan Pearl Date: Thu, 1 Oct 2015 13:49:17 +0000 (-0400) Subject: LP#1501781 - Make patron name search diacritic/space insensitive. X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=refs%2Fheads%2Fuser%2Fdpearl%2Faccent4;p=working%2FEvergreen.git LP#1501781 - Make patron name search diacritic/space insensitive. Diacritical marks may exist in the patron record -- as they should, but this makes patron name search difficult for librarians who may be unfamiliar with all the special characters used and also how to elicit them from a keyboard. To ease this, accented characters will be converted into their 'plain' analogs for comparison purposes. So, for example, if the patron's last name is Chávez, typing "Chavez" in the Last Name box in Patron Search will match it. Spaces in a name (like "De la Croix") will be squashed out so that name would be matched by "Delacroix" or "De la Croix" or variants. The librarian can enter the accented characters or not. Signed-off-by: Dan Pearl Signed-off-by: Signed-off-by: --- diff --git a/Open-ILS/src/extras/install/Makefile.debian-jessie b/Open-ILS/src/extras/install/Makefile.debian-jessie index e3055ad055..3c260604aa 100644 --- a/Open-ILS/src/extras/install/Makefile.debian-jessie +++ b/Open-ILS/src/extras/install/Makefile.debian-jessie @@ -85,6 +85,7 @@ export DEB_APACHE_DISMODS = \ export CPAN_MODULES = \ Business::OnlinePayment::PayPal \ + Text::Unaccent::PurePerl \ Email::Send export CPAN_MODULES_FORCE = \ diff --git a/Open-ILS/src/extras/install/Makefile.debian-squeeze b/Open-ILS/src/extras/install/Makefile.debian-squeeze index 0df1fb9122..2c32683bb2 100644 --- a/Open-ILS/src/extras/install/Makefile.debian-squeeze +++ b/Open-ILS/src/extras/install/Makefile.debian-squeeze @@ -70,6 +70,7 @@ export DEB_APACHE_DISMODS = \ export CPAN_MODULES = \ Excel::Writer::XLSX \ + Text::Unaccent::PurePerl \ Business::OnlinePayment::PayPal \ Library::CallNumber::LC \ Net::Z3950::Simple2ZOOM \ diff --git a/Open-ILS/src/extras/install/Makefile.debian-wheezy b/Open-ILS/src/extras/install/Makefile.debian-wheezy index 59bf75a429..1f9d0736fe 100644 --- a/Open-ILS/src/extras/install/Makefile.debian-wheezy +++ b/Open-ILS/src/extras/install/Makefile.debian-wheezy @@ -80,6 +80,7 @@ export DEB_APACHE_DISMODS = \ export CPAN_MODULES = \ Excel::Writer::XLSX \ + Text::Unaccent::PurePerl \ Business::OnlinePayment::PayPal \ Template::Plugin::POSIX \ diff --git a/Open-ILS/src/extras/install/Makefile.fedora b/Open-ILS/src/extras/install/Makefile.fedora index d338037baa..eaba5830eb 100644 --- a/Open-ILS/src/extras/install/Makefile.fedora +++ b/Open-ILS/src/extras/install/Makefile.fedora @@ -70,6 +70,7 @@ FEDORA_RPMS = \ export CPAN_MODULES = \ Excel::Writer::XLSX \ + Text::Unaccent::PurePerl \ Business::ISSN \ Net::Z3950::ZOOM \ Net::Z3950::Simple2ZOOM \ diff --git a/Open-ILS/src/extras/install/Makefile.ubuntu-precise b/Open-ILS/src/extras/install/Makefile.ubuntu-precise index 853d6f853e..4899f48cb1 100644 --- a/Open-ILS/src/extras/install/Makefile.ubuntu-precise +++ b/Open-ILS/src/extras/install/Makefile.ubuntu-precise @@ -82,6 +82,7 @@ export DEB_APACHE_DISMODS = \ export CPAN_MODULES = \ Excel::Writer::XLSX \ + Text::Unaccent::PurePerl \ Business::CreditCard::Object \ Business::OnlinePayment::PayPal \ Template::Plugin::POSIX \ diff --git a/Open-ILS/src/extras/install/Makefile.ubuntu-trusty b/Open-ILS/src/extras/install/Makefile.ubuntu-trusty index 5e836e6f2c..b39bd34a1f 100644 --- a/Open-ILS/src/extras/install/Makefile.ubuntu-trusty +++ b/Open-ILS/src/extras/install/Makefile.ubuntu-trusty @@ -90,6 +90,7 @@ export DEB_APACHE_DISCONF = \ export CPAN_MODULES = \ Business::CreditCard::Object \ Business::OnlinePayment::PayPal \ + Text::Unaccent::PurePerl export CPAN_MODULES_FORCE = \ Business::Stripe \ diff --git a/Open-ILS/src/perlmods/Build.PL b/Open-ILS/src/perlmods/Build.PL index 5c323085b9..76a720e6d9 100644 --- a/Open-ILS/src/perlmods/Build.PL +++ b/Open-ILS/src/perlmods/Build.PL @@ -82,6 +82,7 @@ my $build = Module::Build->new( 'Text::Aspell' => '0', 'Text::CSV' => '0', 'Text::Glob' => '0', + 'Text::Unaccent::PurePerl' => '0', 'Time::HiRes' => '0', 'Time::Local' => '0', 'Unicode::Normalize' => '0', diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/actor.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/actor.pm index e2a4edc3dd..a4572f0ba3 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/actor.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/actor.pm @@ -5,12 +5,18 @@ use OpenSRF::Utils::Logger qw/:level/; use OpenSRF::Utils qw/:datetime/; use OpenILS::Utils::Fieldmapper; use OpenSRF::Utils::SettingsClient; +use OpenILS::Application::AppUtils; use DateTime; use DateTime::Format::ISO8601; use DateTime::Set; use DateTime::SpanSet; +use Text::Unaccent::PurePerl qw(unac_string); + +my $apputils = "OpenILS::Application::AppUtils"; +my $U = $apputils; + my $_dt_parser = DateTime::Format::ISO8601->new; my $log = 'OpenSRF::Utils::Logger'; @@ -617,6 +623,15 @@ __PACKAGE__->register_method( NOTE ); +sub _prepare_name_argument { + # Get rid of extra spaces, accents, and regex characters + my ($search) = _clean_regex_chars(@_); + $search =~ s/\s//g; + $search = Text::Unaccent::PurePerl::unac_string($search); + + return $search; +}; + sub _clean_regex_chars { my ($search) = @_; @@ -664,8 +679,22 @@ sub patron_search { # group 2 = phone, ident # group 3 = barcode - my $usr = join ' AND ', map { "evergreen.lowercase(CAST($_ AS text)) ~ ?" } grep { ''.$$search{$_}{group} eq '0' } keys %$search; - my @usrv = map { "^" . _clean_regex_chars($$search{$_}{value}) } grep { ''.$$search{$_}{group} eq '0' } keys %$search; + # Treatment of name fields depends on whether the org has + # diacritic_insensitivity turned on or off. + + my $diacritic_insensitive = $U->is_true($U->ou_ancestor_setting_value($ws_ou, 'circ.patron_search.diacritic_insensitive')); + + my $usr; + my @usrv; + + if ($diacritic_insensitive) { + $usr = join ' AND ', map { "evergreen.unaccent_and_squash(CAST($_ AS text)) ~ ?" } grep { ''.$$search{$_}{group} eq '0' } keys %$search; + @usrv = map { "^" . _prepare_name_argument($$search{$_}{value}) } grep { ''.$$search{$_}{group} eq '0' } keys %$search; + + } else { + $usr = join ' AND ', map { "evergreen.lowercase(CAST($_ AS text)) ~ ?" } grep { ''.$$search{$_}{group} eq '0' } keys %$search; + @usrv = map { "^" . _clean_regex_chars($$search{$_}{value}) } grep { ''.$$search{$_}{group} eq '0' } keys %$search; + } my $addr = join ' AND ', map { "evergreen.lowercase(CAST($_ AS text)) ~ ?" } grep { ''.$$search{$_}{group} eq '1' } keys %$search; my @addrv = map { "^" . _clean_regex_chars($$search{$_}{value}) } grep { ''.$$search{$_}{group} eq '1' } keys %$search; @@ -712,7 +741,11 @@ sub patron_search { my @namev; if (0 && $nv) { for my $n ( qw/first_given_name second_given_name family_name/ ) { - push @ns, "evergreen.lowercase($n) ~ ?"; + if ($diacritic_insensitive) { + push @ns, "evergreen.unaccent_and_squash($n) ~ ?"; + } else { + push @ns, "evergreen.lowercase($n) ~ ?"; + } push @namev, "^$nv"; } $name = '(' . join(' OR ', @ns) . ')'; diff --git a/Open-ILS/src/sql/Pg/000.functions.general.sql b/Open-ILS/src/sql/Pg/000.functions.general.sql index 0cfa909c00..0641e209c2 100644 --- a/Open-ILS/src/sql/Pg/000.functions.general.sql +++ b/Open-ILS/src/sql/Pg/000.functions.general.sql @@ -85,4 +85,12 @@ END $protect_reserved$ LANGUAGE plpgsql; +CREATE OR REPLACE FUNCTION evergreen.unaccent_and_squash ( IN arg text) RETURNS text + IMMUTABLE STRICT AS $$ + BEGIN + RETURN evergreen.lowercase(evergreen.unaccent(regexp_replace(arg, '\s','','g'))); + END; +$$ LANGUAGE PLPGSQL; + + COMMIT; diff --git a/Open-ILS/src/sql/Pg/005.schema.actors.sql b/Open-ILS/src/sql/Pg/005.schema.actors.sql index da94af0f72..d7e3e6c3ed 100644 --- a/Open-ILS/src/sql/Pg/005.schema.actors.sql +++ b/Open-ILS/src/sql/Pg/005.schema.actors.sql @@ -82,6 +82,9 @@ CREATE INDEX actor_usr_billing_address_idx ON actor.usr (billing_address); CREATE INDEX actor_usr_first_given_name_idx ON actor.usr (evergreen.lowercase(first_given_name)); CREATE INDEX actor_usr_second_given_name_idx ON actor.usr (evergreen.lowercase(second_given_name)); CREATE INDEX actor_usr_family_name_idx ON actor.usr (evergreen.lowercase(family_name)); +CREATE INDEX actor_usr_first_given_name_unaccent_idx ON actor.usr (evergreen.unaccent_and_squash(first_given_name)); +CREATE INDEX actor_usr_second_given_name_unaccent_idx ON actor.usr (evergreen.unaccent_and_squash(second_given_name)); +CREATE INDEX actor_usr_family_name_unaccent_idx ON actor.usr (evergreen.unaccent_and_squash(family_name)); CREATE INDEX actor_usr_usrname_idx ON actor.usr (evergreen.lowercase(usrname)); CREATE INDEX actor_usr_email_idx ON actor.usr (evergreen.lowercase(email)); diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index a6cc6686fb..0fafbb9439 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -14752,3 +14752,23 @@ INSERT INTO config.org_unit_setting_type 'coust', 'description'), 'bool'); +INSERT INTO config.org_unit_setting_type +( name, grp, label, description, datatype ) +VALUES +('circ.patron_search.diacritic_insensitive', + 'circ', + oils_i18n_gettext('circ.patron_search.diacritic_insensitive', + 'Patron search diacritic insensitive', + 'coust', 'label'), + oils_i18n_gettext('circ.patron_search.diacritic_insensitive', + 'Match patron last, first, and middle names irrespective of usage of diacritical marks or spaces. (e.g., Ines will match Inés; de la Cruz will match Delacruz)', + 'coust', 'description'), + 'bool'); + +INSERT INTO actor.org_unit_setting ( + org_unit, name, value +) VALUES ( + (SELECT id FROM actor.org_unit WHERE parent_ou IS NULL), + 'circ.patron_search.diacritic_insensitive', + 'true' +); diff --git a/Open-ILS/src/sql/Pg/create_database_extensions.sql b/Open-ILS/src/sql/Pg/create_database_extensions.sql index b61aa5b0b0..fa17fe5af0 100644 --- a/Open-ILS/src/sql/Pg/create_database_extensions.sql +++ b/Open-ILS/src/sql/Pg/create_database_extensions.sql @@ -21,3 +21,4 @@ CREATE EXTENSION xml2; CREATE EXTENSION hstore; CREATE EXTENSION intarray; CREATE EXTENSION pgcrypto; +CREATE EXTENSION unaccent; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.patron_unaccent.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.patron_unaccent.sql new file mode 100644 index 0000000000..8d3fb9266a --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.patron_unaccent.sql @@ -0,0 +1,43 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE EXTENSION unaccent; + +CREATE OR REPLACE FUNCTION evergreen.unaccent_and_squash ( IN arg text) RETURNS text + IMMUTABLE STRICT AS $$ + BEGIN + RETURN evergreen.lowercase(evergreen.unaccent(regexp_replace(arg, '\s','','g'))); + END; +$$ LANGUAGE PLPGSQL; + +-- The unaccented indices for patron name fields +CREATE INDEX actor_usr_first_given_name_unaccent_idx ON actor.usr (evergreen.unaccent_and_squash(first_given_name)); +CREATE INDEX actor_usr_second_given_name_unaccent_idx ON actor.usr (evergreen.unaccent_and_squash(second_given_name)); +CREATE INDEX actor_usr_family_name_unaccent_idx ON actor.usr (evergreen.unaccent_and_squash(family_name)); + +-- DB setting to control behavior; true by default +INSERT INTO config.org_unit_setting_type +( name, grp, label, description, datatype ) +VALUES +('circ.patron_search.diacritic_insensitive', + 'circ', + oils_i18n_gettext('circ.patron_search.diacritic_insensitive', + 'Patron search diacritic insensitive', + 'coust', 'label'), + oils_i18n_gettext('circ.patron_search.diacritic_insensitive', + 'Match patron last, first, and middle names irrespective of usage of diacritical marks or spaces. (e.g., Ines will match Inés; de la Cruz will match Delacruz)', + 'coust', 'description'), + 'bool'); + +INSERT INTO actor.org_unit_setting ( + org_unit, name, value +) VALUES ( + (SELECT id FROM actor.org_unit WHERE parent_ou IS NULL), + 'circ.patron_search.diacritic_insensitive', + 'true' +); + + +COMMIT; + diff --git a/docs/RELEASE_NOTES_NEXT/Client/accent_insensitive_patron_search b/docs/RELEASE_NOTES_NEXT/Client/accent_insensitive_patron_search new file mode 100644 index 0000000000..30200eacdb --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/Client/accent_insensitive_patron_search @@ -0,0 +1,16 @@ +Accent Insensitive Patron Search +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +When performing a patron search, in addition to the (existing) +case-insensitivity, these additional characteristics will govern the +search: + +Accent (diacritic) insensitivity:: +Diacritics will be transformed into a plain character equivalent for comparison purposes. So if the patron name is Eugène Delacroix, for example, you could enter +euge for the First Name, and it would match. Ligatures such as Œ are expanded into the constituent characters "OE". + +Space insensitivity:: +Spaces will be squashed out for comparison purposes. If the patron is, again, +Eugène Delacroix, you could enter "de la croix" in the Last Name field and it would match. + +This behavior affects the Last Name, First Name, and Middle Name fields +of the search.