From: Lebbeous Fogle-Weekley Date: Tue, 24 Jan 2012 21:47:37 +0000 (-0500) Subject: Save unicode characters when populating metabib.browse_entry X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=3d0ed9bfd4e85ecd0851a545d1f4aca34ce7d215;p=evergreen%2Fequinox.git Save unicode characters when populating metabib.browse_entry Using a perl-based regexp string splitter instead of the built-in one means that we can get correct suggestions when we type "José" Signed-off-by: Lebbeous Fogle-Weekley --- diff --git a/Open-ILS/src/sql/Pg/000.functions.general.sql b/Open-ILS/src/sql/Pg/000.functions.general.sql index 40ab727269..6a46d1b40c 100644 --- a/Open-ILS/src/sql/Pg/000.functions.general.sql +++ b/Open-ILS/src/sql/Pg/000.functions.general.sql @@ -28,6 +28,11 @@ CREATE OR REPLACE FUNCTION evergreen.xml_escape(str TEXT) RETURNS text AS $$ '>', '>'); $$ LANGUAGE SQL IMMUTABLE; +CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT) +RETURNS TEXT[] AS $$ + return encode_array_literal([split $_[1], $_[0]]); +$$ LANGUAGE PLPERLU STRICT IMMUTABLE; + -- Provide a named type for patching functions CREATE TYPE evergreen.patch AS (patch TEXT); diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 6dcdd6cbff..fa24e609d7 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -806,7 +806,9 @@ BEGIN END IF; IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN - value := ARRAY_TO_STRING(regexp_split_to_array(value, E'\\W+'), ' '); + value := ARRAY_TO_STRING( + evergreen.regexp_split_to_array(value, E'\\W+'), ' ' + ); END IF; NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value); diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql index 838512e73b..aec97f59ca 100644 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib_autosuggest.sql @@ -452,6 +452,14 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; +-- The advantage of this over the stock regexp_split_to_array() is that it +-- won't degrade unicode strings. +CREATE OR REPLACE FUNCTION evergreen.regexp_split_to_array(TEXT, TEXT) +RETURNS TEXT[] AS $$ + return encode_array_literal([split $_[1], $_[0]]); +$$ LANGUAGE PLPERLU STRICT IMMUTABLE; + + -- Adds some logic for browse_entry to split on non-word chars for index_vector, post-normalize CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$ DECLARE @@ -510,7 +518,9 @@ BEGIN END IF; IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN - value := ARRAY_TO_STRING(regexp_split_to_array(value, E'\\W+'), ' '); + value := ARRAY_TO_STRING( + evergreen.regexp_split_to_array(value, E'\\W+'), ' ' + ); END IF; NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);