From 09398d431cdf3fbabc5fca8b207a50e4624e1dcc Mon Sep 17 00:00:00 2001 From: David Boyle Date: Fri, 21 Dec 2012 10:23:48 -0800 Subject: [PATCH] Allows language search on MARC 041 subfields This is in in addition to the 008/35-37 primary language. Searchable subfields are configurable in the Library Settings Editor. Signed-off-by: David Boyle --- .../Application/Storage/Driver/Pg/QueryParser.pm | 54 +++++++++-- Open-ILS/src/sql/Pg/010.schema.biblio.sql | 69 ++++++++++++++ Open-ILS/src/sql/Pg/030.schema.metabib.sql | 18 ++++ Open-ILS/src/sql/Pg/950.data.seed-values.sql | 11 +++ .../Pg/upgrade/XXXX.additional_language_search.sql | 103 +++++++++++++++++++++ .../additional_language_search.txt | 29 ++++++ 6 files changed, 275 insertions(+), 9 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.additional_language_search.sql create mode 100644 docs/RELEASE_NOTES_NEXT/additional_language_search.txt diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm index cbfd99c1ef..a939d87566 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -562,6 +562,22 @@ sub toSQL { $mra_join .= ' AND '. $flat_plan->{fwhere} if $flat_plan->{fwhere}; $mra_join .= ')'; + my $mlf_join; + + # New Feature - additional language search + # language query search uses ts_vector/ts_query for each selected language + + if ($$flat_plan{lang}) { + $mlf_join .= "INNER JOIN metabib.language_filter mlf ON (m.source = mlf.source AND " . $$flat_plan{lang_not} . "("; + my @lang = split(/,/,$$flat_plan{lang}); + $mlf_join .= 'mlf.value @@ ' . shift(@lang) . '::tsquery'; + foreach my $lang (@lang) + { + $mlf_join .= ' OR mlf.value @@ ' . $lang . '::tsquery'; + } + $mlf_join .= '))'; + } + my $rank = $rel; my $desc = 'ASC'; @@ -664,6 +680,10 @@ sub toSQL { my $core_limit = $self->QueryParser->core_limit || 25000; my $flat_where = $$flat_plan{where}; + +$logger->debug('flat_plan: where: ' . $$flat_plan{where} . ' flat_where: ' . $flat_where . ' fwhere: ' . $$flat_plan{fwhere} . ' with: ' . $$flat_plan{with}); +$logger->debug('flat_plan: from: ' . $$flat_plan{from}); +#$logger->debug('mra_join: ' . $mra_join); if ($flat_where eq '()') { $flat_where = ''; } else { @@ -692,6 +712,7 @@ SELECT $key AS id, $record_list $$flat_plan{from} $mra_join + $mlf_join WHERE 1=1 $before $after @@ -703,6 +724,10 @@ SELECT $key AS id, LIMIT $core_limit SQL +open SQL, '>/openils/QueryParser.txt'; +print SQL $sql; +close SQL; + warn $sql if $self->QueryParser->debug; return $sql; @@ -895,30 +920,41 @@ sub flatten { } # for each dynamic filter, build the ON clause for the JOIN + my $flang = ''; + my $lang_not = ''; for my $filter (@dyn_filters) { warn "flatten(): processing dynamic filter ". $filter->name ."\n" if $self->QueryParser->debug; - # bool joiner for intra-plan nodes/filters - $fwhere .= sprintf(" %s ", ($self->joiner eq '&' ? 'AND' : 'OR')) if $fwhere; - my @fargs = @{$filter->args}; my $NOT = $filter->negate ? ' NOT' : ''; my $fname = $filter->name; - $fname = 'item_lang' if $fname eq 'language'; #XXX filter aliases - $fwhere .= sprintf( - "attrs->'%s'$NOT IN (%s)", $fname, - join(',', map { $self->QueryParser->quote_value($_) } @fargs) - ); + # bool joiner for intra-plan nodes/filters + $fwhere .= sprintf(" %s ", ($self->joiner eq '&' ? 'AND' : 'OR')) if $fwhere && $fname ne 'item_lang'; + + # New Feature - additional language search + # if not a language search filter, use the current hstore construct + # otherwise, compose list of search languages for toSQL to construct ts_vector/ts_query search + + if ($fname ne 'item_lang') + { + $fwhere .= sprintf( + "attrs->'%s'$NOT IN (%s)", $fname, + join(',', map { $self->QueryParser->quote_value($_) } @fargs)); + } + else { + $flang .= join(',', map { $self->QueryParser->quote_value($_) } @fargs); + $lang_not = $NOT; + } warn "flatten(): filter where => $fwhere\n" if $self->QueryParser->debug; } warn "flatten(): full filter where => $fwhere\n" if $self->QueryParser->debug; - return { rank_list => \@rank_list, from => $from, where => $where.')', with => $with, fwhere => $fwhere }; + return { rank_list => \@rank_list, from => $from, where => $where.')', with => $with, fwhere => $fwhere, lang =>$flang, lang_not=>$lang_not}; } diff --git a/Open-ILS/src/sql/Pg/010.schema.biblio.sql b/Open-ILS/src/sql/Pg/010.schema.biblio.sql index 4ce54ae96b..7789c192bb 100644 --- a/Open-ILS/src/sql/Pg/010.schema.biblio.sql +++ b/Open-ILS/src/sql/Pg/010.schema.biblio.sql @@ -119,4 +119,73 @@ $$ LANGUAGE PLPGSQL; CREATE TRIGGER norm_sort_label BEFORE INSERT OR UPDATE ON biblio.monograph_part FOR EACH ROW EXECUTE PROCEDURE biblio.normalize_biblio_monograph_part_sortkey(); +-- Function: biblio.extract_languages(bigint) +-- +-- biblio.extract_languages used in proposal/additional_language_search + +CREATE OR REPLACE FUNCTION biblio.extract_languages(bigint) + RETURNS tsvector AS +$BODY$ + +DECLARE + alt_lang text; + lang text; + subfields text; +BEGIN + lang := biblio.marc21_extract_fixed_field($1, 'Lang'); + +-- read MARC 041 subfields from actor.org_unit_setting.opac_additional_language_subfields +-- trim any '"' chars, split into array of subfields + + SELECT value INTO subfields FROM actor.org_unit_setting where name like 'opac.additional_language_subfields'; +-- +-- query MARC 041 specified subfields for additional search languages +-- + FOR alt_lang IN (SELECT value FROM biblio.flatten_marc($1) where tag='041' and subfield = ANY(string_to_array(trim(both '"' from subfields), ','))) + LOOP + lang := lang || ' ' || alt_lang; + END LOOP; + + return lang::tsvector; +END; +$BODY$ +LANGUAGE PLPGSQL; + +-- Function: update_language_filter() +-- +-- biblio.record_entry.language_filter_trigger.update_language_filter is used in proposal/additional_language_search + +CREATE OR REPLACE FUNCTION update_language_filter() + RETURNS trigger AS +$BODY$ +DECLARE + lang tsvector; + lang_filter bigint; +BEGIN + lang := biblio.extract_languages(NEW.id); + + SELECT metabib.language_filter.source INTO lang_filter FROM metabib.language_filter WHERE NEW.id = metabib.language_filter.source; + + IF FOUND THEN + UPDATE metabib.language_filter SET value = lang WHERE metabib.language_filter.source = NEW.id; + ELSE + INSERT INTO metabib.language_filter(source, value) VALUES (NEW.id, lang); + END IF; + + RETURN NEW; + +END; +$BODY$ +LANGUAGE PLPGSQL; + +-- Trigger: language_filter_trigger on biblio.record_entry +-- +-- biblio.record_entry.language_filter_trigger is used in proposal/additional_language_search + +CREATE TRIGGER language_filter_trigger + AFTER INSERT OR UPDATE + ON biblio.record_entry + FOR EACH ROW + EXECUTE PROCEDURE update_language_filter(); + COMMIT; diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index e833ef22cf..58dab0ad68 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -1491,4 +1491,22 @@ SELECT DISTINCT END; $func$ LANGUAGE PLPGSQL; +-- Table metabib.language_filter +-- +-- metabib.language_filter is used in proposal/additional_language_search + +CREATE TABLE metabib.language_filter +( + id bigserial NOT NULL, + source bigint NOT NULL, + value tsvector, + CONSTRAINT id PRIMARY KEY (id ), + CONSTRAINT source FOREIGN KEY (source) + REFERENCES biblio.record_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE NO ACTION +) +WITH ( + OIDS=FALSE +); + COMMIT; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index 88051810eb..f3e81d6080 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -12004,3 +12004,14 @@ INSERT INTO config.org_unit_setting_type ), 'integer' ); +-- +-- added record in config.org_unit_setting_type is used in proposal/additional_language_search +-- +INSERT INTO config.org_unit_setting_type + (name, label, grp, description, datatype) + VALUES ( + 'opac.additional_language.subfields', + 'Specify MARC 041 subfields for additional search languages', + 'opac', + 'Specify which MARC 041 subfields should be used when searching for additional languages.','array' + ); diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.additional_language_search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.additional_language_search.sql new file mode 100644 index 0000000000..0991e86fb5 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.additional_language_search.sql @@ -0,0 +1,103 @@ +-- Function: biblio.extract_languages(bigint) +-- +-- biblio.extract_languages used in proposal/additional_language_search + +BEGIN; + +CREATE OR REPLACE FUNCTION biblio.extract_languages(bigint) + RETURNS tsvector AS +$BODY$ + +DECLARE + alt_lang text; + lang text; + subfields text; +BEGIN + lang := biblio.marc21_extract_fixed_field($1, 'Lang'); + +-- read MARC 041 subfields from actor.org_unit_setting.opac_additional_language_subfields +-- trim any '"' chars, split into array of subfields + + SELECT value INTO subfields FROM actor.org_unit_setting where name like 'opac.additional_language_subfields'; +-- +-- query MARC 041 specified subfields for additional search languages +-- + FOR alt_lang IN (SELECT value FROM biblio.flatten_marc($1) where tag='041' and subfield = ANY(string_to_array(trim(both '"' from subfields), ','))) + LOOP + lang := lang || ' ' || alt_lang; + END LOOP; + + return lang::tsvector; +END; +$BODY$ +LANGUAGE PLPGSQL; + +-- Function: update_language_filter() +-- +-- biblio.record_entry.language_filter_trigger.update_language_filter is used in proposal/additional_language_search + +CREATE OR REPLACE FUNCTION update_language_filter() + RETURNS trigger AS +$BODY$ +DECLARE + lang tsvector; + lang_filter bigint; +BEGIN + lang := biblio.extract_languages(NEW.id); + + SELECT metabib.language_filter.source INTO lang_filter FROM metabib.language_filter WHERE NEW.id = metabib.language_filter.source; + + IF FOUND THEN + UPDATE metabib.language_filter SET value = lang WHERE metabib.language_filter.source = NEW.id; + ELSE + INSERT INTO metabib.language_filter(source, value) VALUES (NEW.id, lang); + END IF; + + RETURN NEW; + +END; +$BODY$ +LANGUAGE PLPGSQL; + +-- Trigger: language_filter_trigger on biblio.record_entry +-- +-- biblio.record_entry.language_filter_trigger is used in proposal/additional_language_search + +CREATE TRIGGER language_filter_trigger + AFTER INSERT OR UPDATE + ON biblio.record_entry + FOR EACH ROW + EXECUTE PROCEDURE update_language_filter(); + +-- Table metabib.language_filter +-- +-- metabib.language_filter is used in proposal/additional_language_search + +CREATE TABLE metabib.language_filter +( + id bigserial NOT NULL, + source bigint NOT NULL, + value tsvector, + CONSTRAINT id PRIMARY KEY (id ), + CONSTRAINT source FOREIGN KEY (source) + REFERENCES biblio.record_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE NO ACTION +) +WITH ( + OIDS=FALSE +); + +COMMIT; + + +-- +-- added record in config.org_unit_setting_type is used in proposal/additional_language_search +-- +INSERT INTO config.org_unit_setting_type + (name, label, grp, description, datatype) + VALUES ( + 'opac.additional_language.subfields', + 'Specify MARC 041 subfields for additional search languages', + 'opac', + 'Specify which MARC 041 subfields should be used when searching for additional languages.','array' + ); diff --git a/docs/RELEASE_NOTES_NEXT/additional_language_search.txt b/docs/RELEASE_NOTES_NEXT/additional_language_search.txt new file mode 100644 index 0000000000..e1a795b421 --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/additional_language_search.txt @@ -0,0 +1,29 @@ +New Feature - Additional Language Search +======================================== +David Boyle +---------------------------------------- +.Overview +New feature which allow languages other than the primary MARC 008/35-37 language to be a search target. + +Added functionality would allow specified MARC 041 subfields to be included as search targets. + +These additional MARC 041 subfields are configurable, as described below. + +.Configuration: + +The feature is configured via : +Group -> opac +Setting -> "Set MARC 041 subfields for additional language search" + +The value for this setting is an array, with each array value specifying a MARC 041 tag subfield (a, e.g.) + +.Code Changes: +* Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm + +.SQL Script Changes: +* Open-ILS/src/sql/Pg/010.schema.biblio.sql +* Open-ILS/src/sql/Pg/030.schema.metabib.sql +* Open-ILS/src/sql/Pg/950.data.seed-values.sql + +.SQL Script Additions: +* Open-ILS/src/sql/Pg/upgrade/XXXX.additional_language_search.sql -- 2.11.0