From 8843a15bbeb6eaf9c9749be09e0e39811d472179 Mon Sep 17 00:00:00 2001 From: Lebbeous Fogle-Weekley Date: Tue, 21 May 2013 16:48:48 -0400 Subject: [PATCH] OPAC Browse: Build browse entry sort_value column separately from value This means changes to the ingest process and a config.metabib_field table that's one column wider. This means you can get browse headings like "The Nutcracker" that appear visually as such, but sort as if they only contained "Nutcracker", assuming correct cataloging with non-filing indicators. Previously the heading would both look and sort like "Nutcracker." Signed-off-by: Lebbeous Fogle-Weekley --- Open-ILS/src/sql/Pg/002.schema.config.sql | 3 +- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 64 +++++++++------ Open-ILS/src/sql/Pg/950.data.seed-values.sql | 20 ++--- .../sql/Pg/upgrade/YYYY.schema.bib-auth-browse.sql | 96 ++++++++++++---------- 4 files changed, 105 insertions(+), 78 deletions(-) diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 2738748af1..1bf0efe9e4 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -193,8 +193,9 @@ CREATE TABLE config.metabib_field ( facet_field BOOL NOT NULL DEFAULT FALSE, browse_field BOOL NOT NULL DEFAULT TRUE, browse_xpath TEXT, + browse_sort_xpath TEXT, facet_xpath TEXT, - authority_xpath TEXT, + authority_xpath TEXT, restrict BOOL DEFAULT FALSE NOT NULL ); COMMENT ON TABLE config.metabib_field IS $$ diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index e6caa0a999..035c8a53b6 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -185,26 +185,16 @@ CREATE INDEX metabib_facet_entry_source_idx ON metabib.facet_entry (source); CREATE TABLE metabib.browse_entry ( id BIGSERIAL PRIMARY KEY, - value TEXT unique, + value TEXT, index_vector tsvector, - sort_value TEXT NOT NULL + sort_value TEXT NOT NULL, + UNIQUE(value, sort_value) ); + CREATE INDEX browse_entry_sort_value_idx ON metabib.browse_entry USING BTREE (sort_value); -CREATE OR REPLACE FUNCTION metabib.browse_entry_sort_value() -RETURNS TRIGGER AS $$ - BEGIN - NEW.sort_value = public.search_normalize(NEW.value); - RETURN NEW; - END; -$$ LANGUAGE PLPGSQL; - -CREATE TRIGGER mbe_sort_value -BEFORE INSERT OR UPDATE ON metabib.browse_entry -FOR EACH ROW EXECUTE PROCEDURE metabib.browse_entry_sort_value(); - CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector); CREATE TRIGGER metabib_browse_entry_fti_trigger BEFORE INSERT OR UPDATE ON metabib.browse_entry @@ -402,14 +392,15 @@ CREATE INDEX metabib_metarecord_source_map_metarecord_idx ON metabib.metarecord_ CREATE INDEX metabib_metarecord_source_map_source_record_idx ON metabib.metarecord_source_map (source); CREATE TYPE metabib.field_entry_template AS ( - field_class TEXT, - field INT, - facet_field BOOL, - search_field BOOL, - browse_field BOOL, - source BIGINT, - value TEXT, - authority BIGINT + field_class TEXT, + field INT, + facet_field BOOL, + search_field BOOL, + browse_field BOOL, + source BIGINT, + value TEXT, + authority BIGINT, + sort_value TEXT ); @@ -424,6 +415,7 @@ DECLARE xml_node_list TEXT[]; facet_text TEXT; browse_text TEXT; + sort_value TEXT; raw_text TEXT; curr_text TEXT; joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? @@ -492,10 +484,24 @@ BEGIN browse_text := curr_text; END IF; + IF idx.browse_sort_xpath IS NOT NULL AND + idx.browse_sort_xpath <> '' THEN + + sort_value := oils_xpath_string( + idx.browse_sort_xpath, xml_node, joiner, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + ELSE + sort_value := browse_text; + END IF; + output_row.field_class = idx.field_class; output_row.field = idx.id; output_row.source = rid; output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + output_row.sort_value := + public.search_normalize(sort_value); + output_row.authority := NULL; IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN @@ -520,6 +526,7 @@ BEGIN output_row.browse_field = TRUE; RETURN NEXT output_row; output_row.browse_field = FALSE; + output_row.sort_value := NULL; END IF; -- insert raw node text for faceting @@ -625,6 +632,7 @@ DECLARE b_skip_facet BOOL; b_skip_browse BOOL; b_skip_search BOOL; + value_prepped TEXT; BEGIN SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; @@ -664,12 +672,18 @@ BEGIN -- evergreen.oils_tsearch2()) changes. It may or may not be -- expensive to add a comparison of index_vector to index_vector -- to the WHERE clause below. - SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value; + + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE value = value_prepped AND sort_value = ind_data.sort_value; + IF FOUND THEN mbe_id := mbe_row.id; ELSE - INSERT INTO metabib.browse_entry (value) VALUES - (metabib.browse_normalize(ind_data.value, ind_data.field)); + INSERT INTO metabib.browse_entry + ( value, sort_value ) VALUES + ( value_prepped, ind_data.sort_value ); + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); END IF; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index 75a37f4017..798a9e8354 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -108,19 +108,19 @@ INSERT INTO config.xml_transform VALUES ( 'mods33', 'http://www.loc.gov/mods/v3' INSERT INTO config.xml_transform VALUES ( 'marc21expand880', 'http://www.loc.gov/MARC21/slim', 'marc', '' ); -- Index Definitions -INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_field, authority_xpath, browse_xpath ) VALUES - (1, 'series', 'seriestitle', oils_i18n_gettext(1, 'Series Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo[@type="nfi"]$$, TRUE, '//@xlink:href', $$//*/*[local-name() != "nonSort"]$$ ); -- vim */ +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_field, authority_xpath, browse_sort_xpath ) VALUES + (1, 'series', 'seriestitle', oils_i18n_gettext(1, 'Series Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo[@type="nfi"]$$, TRUE, '//@xlink:href', $$*[local-name() != "nonSort"]$$ ); INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath ) VALUES (2, 'title', 'abbreviated', oils_i18n_gettext(2, 'Abbreviated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='abbreviated')]$$, '//@xlink:href' ); -INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_xpath ) VALUES - (3, 'title', 'translated', oils_i18n_gettext(3, 'Translated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='translated-nfi')]$$, '//@xlink:href', $$//*/*[local-name() != "nonSort"]$$ ); -- vim */ -INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_xpath ) VALUES - (4, 'title', 'alternative', oils_i18n_gettext(4, 'Alternate Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='alternative-nfi')]$$, '//@xlink:href', $$//*/*[local-name() != "nonSort"]$$ ); -- vim */ -INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_xpath ) VALUES - (5, 'title', 'uniform', oils_i18n_gettext(5, 'Uniform Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='uniform-nfi')]$$, '//@xlink:href', $$//*/*[local-name() != "nonSort"]$$ ); -- vim */ -INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_field ) VALUES - (6, 'title', 'proper', oils_i18n_gettext(6, 'Title Proper', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleNonfiling[mods32:title and not (@type)]$$, '//@xlink:href', FALSE ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_sort_xpath ) VALUES + (3, 'title', 'translated', oils_i18n_gettext(3, 'Translated Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='translated-nfi')]$$, '//@xlink:href', $$*[local-name() != "nonSort"]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_sort_xpath ) VALUES + (4, 'title', 'alternative', oils_i18n_gettext(4, 'Alternate Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='alternative-nfi')]$$, '//@xlink:href', $$*[local-name() != "nonSort"]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_sort_xpath ) VALUES + (5, 'title', 'uniform', oils_i18n_gettext(5, 'Uniform Title', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='uniform-nfi')]$$, '//@xlink:href', $$*[local-name() != "nonSort"]$$ ); +INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, authority_xpath, browse_field, browse_sort_xpath ) VALUES + (6, 'title', 'proper', oils_i18n_gettext(6, 'Title Proper', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleNonfiling[mods32:title and not (@type)]$$, '//@xlink:href', TRUE, $$*[local-name() != "nonSort"]$$ ); INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, facet_xpath, facet_field , authority_xpath) VALUES (7, 'author', 'corporate', oils_i18n_gettext(7, 'Corporate Author', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:name[@type='corporate' and (mods32:role/mods32:roleTerm[text()='creator'] or mods32:role/mods32:roleTerm[text()='aut'] or mods32:role/mods32:roleTerm[text()='cre'])]$$, $$//*[local-name()='namePart']$$, TRUE, '//@xlink:href' ); -- /* to fool vim */; diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib-auth-browse.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib-auth-browse.sql index 3ecf6b252d..ee4b42c4b7 100644 --- a/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib-auth-browse.sql +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.schema.bib-auth-browse.sql @@ -6,15 +6,7 @@ ALTER TABLE metabib.browse_entry_def_map ADD COLUMN authority BIGINT REFERENCES ON DELETE SET NULL; ALTER TABLE config.metabib_field ADD COLUMN authority_xpath TEXT; - --- The following UPDATE also affects AutoSuggest, but not in a way users --- are likely to find objectionable. The selection of suggestions based --- on user input shouldn't change, but the displayed heading will no longer --- show leading articles or similar that can be rubbed out by non-filing --- indicators. - -UPDATE config.metabib_field SET browse_field = FALSE - WHERE field_class = 'title' and name = 'proper'; +ALTER TABLE config.metabib_field ADD COLUMN browse_sort_xpath TEXT; UPDATE config.metabib_field SET authority_xpath = '//@xlink:href' @@ -23,18 +15,8 @@ UPDATE config.metabib_field field_class IN ('subject','series','title','author') AND browse_field IS TRUE; --- INSERT INTO config.metabib_field ( --- id, field_class, name, label, xpath, format, --- search_field, facet_field, browse_field --- ) VALUES ( --- 31, 'title', 'browse', --- oils_i18n_gettext(31, 'Title Proper (Browse)', 'cmf', 'label'), --- $$//mods32:mods/mods32:titleInfo[not (@type)]/mods32:title$$, --- 'mods32', FALSE, FALSE, TRUE --- ); - ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE authority BIGINT; - +ALTER TYPE metabib.field_entry_template ADD ATTRIBUTE sort_value TEXT; CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ DECLARE @@ -45,6 +27,7 @@ DECLARE b_skip_facet BOOL; b_skip_browse BOOL; b_skip_search BOOL; + value_prepped TEXT; BEGIN SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; @@ -84,12 +67,18 @@ BEGIN -- evergreen.oils_tsearch2()) changes. It may or may not be -- expensive to add a comparison of index_vector to index_vector -- to the WHERE clause below. - SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value; + + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE value = value_prepped AND sort_value = ind_data.sort_value; + IF FOUND THEN mbe_id := mbe_row.id; ELSE - INSERT INTO metabib.browse_entry (value) VALUES - (metabib.browse_normalize(ind_data.value, ind_data.field)); + INSERT INTO metabib.browse_entry + ( value, sort_value ) VALUES + ( value_prepped, ind_data.sort_value ); + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); END IF; @@ -129,6 +118,7 @@ DECLARE xml_node_list TEXT[]; facet_text TEXT; browse_text TEXT; + sort_value TEXT; raw_text TEXT; curr_text TEXT; joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? @@ -197,10 +187,24 @@ BEGIN browse_text := curr_text; END IF; + IF idx.browse_sort_xpath IS NOT NULL AND + idx.browse_sort_xpath <> '' THEN + + sort_value := oils_xpath_string( + idx.browse_sort_xpath, xml_node, joiner, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + ELSE + sort_value := browse_text; + END IF; + output_row.field_class = idx.field_class; output_row.field = idx.id; output_row.source = rid; output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + output_row.sort_value := + public.search_normalize(sort_value); + output_row.authority := NULL; IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN @@ -225,6 +229,7 @@ BEGIN output_row.browse_field = TRUE; RETURN NEXT output_row; output_row.browse_field = FALSE; + output_row.sort_value := NULL; END IF; -- insert raw node text for faceting @@ -7080,23 +7085,17 @@ VALUES ( ) ); +ALTER TABLE metabib.browse_entry DROP CONSTRAINT browse_entry_value_key; ALTER TABLE metabib.browse_entry ADD COLUMN sort_value TEXT; - -CREATE OR REPLACE FUNCTION metabib.browse_entry_sort_value() -RETURNS TRIGGER AS $$ - BEGIN - NEW.sort_value = public.search_normalize(NEW.value); - RETURN NEW; - END; -$$ LANGUAGE PLPGSQL; - -CREATE TRIGGER mbe_sort_value -BEFORE INSERT OR UPDATE ON metabib.browse_entry -FOR EACH ROW EXECUTE PROCEDURE metabib.browse_entry_sort_value(); - -UPDATE metabib.browse_entry SET value = value; - +DELETE FROM metabib.browse_entry_def_map; -- Yeah. +DELETE FROM metabib.browse_entry WHERE sort_value IS NULL; ALTER TABLE metabib.browse_entry ALTER COLUMN sort_value SET NOT NULL; +ALTER TABLE metabib.browse_entry ADD UNIQUE (value, sort_value); +DROP TRIGGER IF EXISTS mbe_sort_value ON metabib.browse_entry; + +\qecho This is a browse-only reingest of your bib records. It may take a while. +SELECT metabib.reingest_metabib_field_entries(id, TRUE, FALSE, TRUE) + FROM biblio.record_entry; CREATE INDEX browse_entry_sort_value_idx ON metabib.browse_entry USING BTREE (sort_value); @@ -7316,28 +7315,41 @@ $p$ LANGUAGE PLPGSQL; UPDATE config.metabib_field SET xpath = $$//mods32:mods/mods32:relatedItem[@type="series"]/mods32:titleInfo[@type="nfi"]$$, - browse_xpath = $$//*/*[local-name() != "nonSort"]$$ -- vim highlighting */ + browse_sort_xpath = $$*[local-name() != "nonSort"]$$, + browse_xpath = NULL WHERE field_class = 'series' AND name = 'seriestitle' ; UPDATE config.metabib_field SET + xpath = $$//mods32:mods/mods32:titleInfo[mods32:title and not (@type)]$$, + browse_sort_xpath = $$*[local-name() != "nonSort"]$$, + browse_xpath = NULL, + browse_field = TRUE +WHERE + field_class = 'title' AND name = 'proper' ; + +UPDATE config.metabib_field +SET xpath = $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='alternative-nfi')]$$, - browse_xpath = $$//*/*[local-name() != "nonSort"]$$ -- vim highlighting */ + browse_sort_xpath = $$*[local-name() != "nonSort"]$$, + browse_xpath = NULL WHERE field_class = 'title' AND name = 'alternative' ; UPDATE config.metabib_field SET xpath = $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='uniform-nfi')]$$, - browse_xpath = $$//*/*[local-name() != "nonSort"]$$ -- vim highlighting */ + browse_sort_xpath = $$*[local-name() != "nonSort"]$$, + browse_xpath = NULL WHERE field_class = 'title' AND name = 'uniform' ; UPDATE config.metabib_field SET xpath = $$//mods32:mods/mods32:titleInfo[mods32:title and (@type='translated-nfi')]$$, - browse_xpath = $$//*/*[local-name() != "nonSort"]$$ -- vim highlighting */ + browse_sort_xpath = $$*[local-name() != "nonSort"]$$, + browse_xpath = NULL WHERE field_class = 'title' AND name = 'translated' ; -- 2.11.0