From: Jeffrey Bond Date: Thu, 24 Jan 2013 21:44:44 +0000 (-0800) Subject: Search Modifications - X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=4563a4974ca02800aa072d1709069ded7a3c2638;p=working%2FEvergreen.git Search Modifications - Modifies the way searches work under config options in local administration. Specifically modifies how matches exactly, contains phrase and fully normalized phrases are searched. Signed-off-by: Jeffrey Bond --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm index cbfd99c1ef..296476982e 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm @@ -731,6 +731,189 @@ sub rel_bump { return ''; } +sub naco_normalize{ + my $string = shift; + my $query_handle; + my $pg = 'OpenILS::Application::Storage::Driver::Pg'; + my @dbh = $pg->db_Handles(); + + $query_handle = $dbh[0]->prepare("SELECT search_normalize('" . $string . "') AS value"); + $query_handle->execute(); + $query_handle->bind_columns( \$string ); + + $string = ''; + + #Get results of query + $query_handle->fetch(); + + return $string; + +} + +sub remove_search_characters{ + my $str = shift; + + $str =~ s/^[\"\^|\^|\"]//; + $str =~ s/[\$|\"]$//; + return $str; +} + +sub quote_value { + my $string = shift || ''; + my @string = split(//, $string); + $string = ''; + foreach my $char(@string){ + if($char ne "'"){ + $string .= "\\" . $char; + }else{ + $string .= $char . $char; + } + } + + $logger->debug("Quoted value: " . $string); + + return $string; + +} +sub search_mod{ + + my $self = shift; + my $node = shift; + my $searchVal = ''; + my $searchType = ''; + my @searchTypes = ('exactMatch', 'containsPhrase', 'doesNotContainPhrase', 'startsWith', 'contains'); + my $normalized = 0; + my $join = ''; + my $where = ''; + my $joinType = ''; + my $table = $node->table; + $logger->debug("Contents of node table: " . $table); + my @temp = split(/\./, $table); + $logger->debug("Contents of temp: " . Dumper(\@temp)); + $table = join('.normalized_', @temp); + $logger->debug("Contents of table: " . $table); + + $logger->debug("Performing search mods."); + $logger->debug("Checking for atoms."); + + if ( ( @{$node->only_atoms} )[0] ){ + $logger->debug("Search contains atoms."); + for my $atom ( @{$node->only_atoms} ) { + if($searchVal){ + $searchVal .= " " . $atom->content; + }else{ + $searchVal = $atom->content; + } + } + } + + #Getting value if it came through in the phrases + if(@{$node->phrases}){$searchVal = @{$node->phrases}[0];} + if(@{$node->unphrases}){$searchVal = @{$node->unphrases}[0];} + + #No search values found return empty sources + if($searchVal eq ''){ + $logger->debug("No search value found for search mods"); + return; + } + + $logger->debug("Search value constructed!"); + $logger->debug("Search value: " . $searchVal); + + for(my $i = 0; $i < @searchTypes; $i++){ + my $last = 0; + + if( $searchTypes[$i] eq 'exactMatch' && $searchVal =~ m/^\^/ && $searchVal =~ m/\$$/ ){ $last = 1;} + + if( $searchTypes[$i] eq 'containsPhrase' && ( ($searchVal =~ m/^\"/ && $searchVal =~ m/\"$/) || @{$node->phrases}) ){ $last = 1; } + + if( $searchTypes[$i] eq 'doesNotContainPhrase' && @{$node->unphrases} ){$last = 1;} + + if( $searchTypes[$i] eq 'startsWith' && (($searchVal =~ m/^\"\^/ && $searchVal =~ m/\"$/) || ( $searchVal =~ m/^\^/ && @{$node->phrases}) ) ){$last = 1;} + + #If last is true set up search type and escape for + if($last){ + $searchType = $searchTypes[$i]; + last; + } + } + + if(!$searchType){ + $searchType = 'contains'; + } + + $logger->debug("Search type found: $searchType"); + + $searchVal = remove_search_characters($searchVal); + my $nsVal = naco_normalize($searchVal); + + if($nsVal){ + $searchVal = $nsVal; + }else{ + $normalized = 1; + } + + my $queryParam = {searchVal=>$searchVal, + searchType=>$searchType, + normalized=>$normalized, + table=>$table}; + + $logger->debug("Query param: " . Dumper($queryParam)); + + if($normalized || + $queryParam->{searchType} eq 'containsPhrase' || + $queryParam->{searchType} eq 'exactMatch'){ + + $join .= "\n\t JOIN " . $queryParam->{table} . " AS norm ON (fe.id = norm.id)"; + $where .= "\n\t WHERE 1 = 1 "; + + } + if($normalized){ + $joinType = 'LEFT'; + if($queryParam->{searchType} eq 'containsPhrase'){ + $where .= "\n\t\t AND (fe.value LIKE '" . quote_value($queryParam->{searchVal}) . " %' OR fe.value LIKE '% " . quote_value($queryParam->{searchVal}) . " %' OR fe.value LIKE '% " . quote_value($queryParam->{searchVal}) . "')"; + }elsif($queryParam->{searchType} eq 'doesNotContainPhrase'){ + $where .= "\n\t\t AND (fe.value NOT LIKE '" . quote_value($queryParam->{searchVal}) . " %' OR fe.value NOT LIKE '% " . quote_value($queryParam->{searchVal}) . " %' OR fe.value NOT LIKE '% " . quote_value($queryParam->{searchVal}) . "')"; + }elsif($queryParam->{searchType} eq 'exactMatch'){ + $where .= "\n\t\t AND (fe.value LIKE '" . quote_value($queryParam->{searchVal}) . "')"; + }elsif($queryParam->{searchType} eq 'startsWith'){ + $where .= "\n\t\t AND (fe.value LIKE '" . quote_value($queryParam->{searchVal}) . "%')"; + }elsif($queryParam->{searchType} eq 'contains'){ + my @contains = split(/ /, $queryParam->{searchVal}); + $where .= "\n\t\t AND ( "; + for(my $i = 0; $i < @contains; $i++){ + + $where .= "fe.value LIKE '%" . quote_value($contains[$i]) . "%'"; + + unless($i == $#contains){ + $where .= " AND " + } + } + + $where .= ")"; + } + }else{ + $joinType = 'RIGHT'; + if($queryParam->{searchType} eq 'containsPhrase'){ + $where .= "\n\t\t AND (norm.value LIKE '" . $queryParam->{searchVal} . " %' OR norm.value LIKE '% " . $queryParam->{searchVal} . " %' OR norm.value LIKE '% " . $queryParam->{searchVal} . "')"; + } +#Does not contain phrase seems to have an issue outside the scope of this feature +# elsif($queryParam->{searchType} eq 'doesNotContainPhrase'){ +# $where .= "\n\t\t AND (norm.value NOT LIKE '% " . $queryParam->{searchVal} . "%' OR norm.value NOT LIKE '%" . $queryParam->{searchVal} . "%' OR norm.value NOT LIKE '% " . $queryParam->{searchVal} . " %')"; } + + elsif($queryParam->{searchType} eq 'exactMatch'){ + $where .= "\n\t\t AND (norm.value LIKE '" . $queryParam->{searchVal} . "')"; + } + } + + my $return = {'join'=>$join, 'where'=>$where, 'joinType'=>$joinType}; + + $logger->debug("Contents of search_mod: " . Dumper($return)); + + return $return; + +} + sub flatten { my $self = shift; @@ -761,6 +944,9 @@ sub flatten { my $table = $node->table; my $talias = $node->table_alias; + my $search_mods = $self->QueryParser->search_mods ? search_mod($self, $node) : ''; + my $jt = $search_mods ? $search_mods->{'joinType'} : ''; + my $node_rank = 'COALESCE(' . $node->rank . " * ${talias}.weight, 0.0)"; my $core_limit = $self->QueryParser->core_limit || 25000; @@ -770,11 +956,15 @@ sub flatten { if ($node->dummy_count < @{$node->only_atoms} ) { $with .= ",\n" if $with; $with .= "${talias}_xq AS (SELECT ". $node->tsquery ." AS tsq )"; - $from .= "\n${spc}${spc}${spc}JOIN ${talias}_xq ON (fe.index_vector @@ ${talias}_xq.tsq)"; + $from .= "\n${spc}${spc}${spc} " . $jt . " JOIN ${talias}_xq ON (fe.index_vector @@ ${talias}_xq.tsq)"; } else { $from .= "\n${spc}${spc}${spc}, (SELECT NULL::tsquery AS tsq ) AS x"; } + $from .= $search_mods->{'join'} if $search_mods; + + + my $fieldIds; my @bump_fields; if (@{$node->fields} > 0) { @bump_fields = @{$node->fields}; @@ -789,12 +979,15 @@ sub flatten { if (@field_ids) { $from .= "\n${spc}${spc}${spc}WHERE fe_weight.id IN (" . join(',', @field_ids) . ")"; + $fieldIds = "(" . join(',', @field_ids) . ")"; } } else { @bump_fields = @{$self->QueryParser->search_fields->{$node->classname}}; } + $from .= $search_mods->{'where'} if $search_mods; + ###$from .= "\n${spc}${spc}LIMIT $core_limit"; $from .= "\n${spc}) AS $talias ON (m.source = ${talias}.source)"; @@ -816,8 +1009,10 @@ sub flatten { my $twhere .= '(' . $talias . ".id IS NOT NULL"; - $twhere .= ' AND ' . join(' AND ', map {"${talias}.value ~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->phrases}) if (@{$node->phrases}); - $twhere .= ' AND ' . join(' AND ', map {"${talias}.value !~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->unphrases}) if (@{$node->unphrases}); + unless($search_mods){ + $twhere .= ' AND ' . join(' AND ', map {"${talias}.value ~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->phrases}) if (@{$node->phrases}); + $twhere .= ' AND ' . join(' AND ', map {"${talias}.value !~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->unphrases}) if (@{$node->unphrases}); + } $twhere .= ')'; if (@dyn_filters or !$self->top_plan) { diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm index 722c6c58f9..909e484470 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm @@ -2863,7 +2863,7 @@ sub str2abstract_query { _initialize_parser($parser) unless $parser->initialization_complete; - my $query = $parser->new(%use_opts)->parse; + $query = $parser->new(%use_opts)->parse; return $query->parse_tree->to_abstract_query(with_config => $with_config); } @@ -2928,6 +2928,7 @@ sub query_parser_fts { } + $log->debug("OU: ". Dumper($args{ou})); # parse the query and supply any query-level %arg-based defaults # we expect, and make use of, query, superpage, superpage_size, debug and core_limit args my $query = $parser->new( %args )->parse; @@ -3308,7 +3309,7 @@ sub query_parser_fts_wrapper { $log->debug("Full QueryParser query: $query", DEBUG); - return query_parser_fts($self, $client, query => $query, _simple_plan => $base_plan->simple_plan ); + return query_parser_fts($self, $client, query => $query, _simple_plan => $base_plan->simple_plan, search_mods => $args{search_mods} ); } __PACKAGE__->register_method( api_name => "open-ils.storage.biblio.multiclass.staged.search_fts", diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm index 7ee0402dd4..d0c3f7702b 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm @@ -95,6 +95,10 @@ sub new { $self->$opt( $opts{$opt} ) if ($self->can($opt)); } + if($opts{search_mods}){ + $self->{search_mods} = $opts{search_mods}; + } + return $self; } @@ -784,6 +788,14 @@ sub core_limit { return $self->{core_limit}; } +sub search_mods { + my $self = shift; + my $l = shift; + $self->{search_mods} = $l if ($l); + return $self->{search_mods}; +} + + sub superpage { my $self = shift; my $l = shift; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Search.pm b/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Search.pm index dea01459d5..8f7e4c15fb 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Search.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Search.pm @@ -294,6 +294,28 @@ sub load_rresults_bookbag_item_notes { return; } +#Check if user is using search modifications +sub get_search_mod{ + + my $self = shift; + my $ctx = $self->ctx; + my $ou = ''; + my $sname = 'opac.search_mod'; + + #Check if user is staff or patron + if($ctx->{is_staff}){ + $ou = $ctx->{user}->ws_ou; + }else{ + $ou = $self->_get_pref_lib() || $self->_get_search_lib(); + } + + #Get settings, set search type + my $return = $self->ctx->{get_org_setting}->($ou, $sname) ? 1 : 0; + + + $logger->debug("Get Search Mods: " . $return); + return $return; +} # context additions: # page_size # hit_count @@ -396,9 +418,11 @@ sub load_rresults { return $self->generic_redirect; } + my $search_mod = get_search_mod($self); + # Limit and offset will stay here. Everything else should be part of # the query string, not special args. - my $args = {'limit' => $limit, 'offset' => $offset}; + my $args = {'limit' => $limit, 'offset' => $offset, 'search_mods' => $search_mod}; if ($tag_circs) { $args->{tag_circulated_records} = 1; diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index e833ef22cf..b17fce904e 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -1491,4 +1491,363 @@ SELECT DISTINCT END; $func$ LANGUAGE PLPGSQL; + +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_identifier_field_entry + +-- DROP TABLE metabib.normalized_identifier_field_entry; + +DROP TABLE IF EXISTS metabib.normalized_identifier_field_entry; + +CREATE TABLE metabib.normalized_identifier_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_identifier_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_identifier_to_ identifier_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.identifier_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); +ALTER TABLE metabib.normalized_identifier_field_entry + OWNER TO evergreen; + +-- Index: metabib."fki_normalized_identifier_to_ identifier_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_identifier_to_ identifier_field_entry_FK"; + +CREATE INDEX "fki_normalized_identifier_to_ identifier_field_entry_FK" + ON metabib.normalized_identifier_field_entry + USING btree + (id ); + +--------------------------------------------------------------------------------------------------------- + + +-- Table: metabib.normalized_title_field_entry + +-- DROP TABLE metabib.normalized_title_field_entry; + +DROP TABLE IF EXISTS metabib.normalized_title_field_entry; + +CREATE TABLE metabib.normalized_title_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_title_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_title_to_ title_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.title_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); +ALTER TABLE metabib.normalized_title_field_entry + OWNER TO evergreen; + +-- Index: metabib."fki_normalized_title_to_ title_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_title_to_ title_field_entry_FK"; + +CREATE INDEX "fki_normalized_title_to_ title_field_entry_FK" + ON metabib.normalized_title_field_entry + USING btree + (id ); + +--------------------------------------------------------------------------------------------------------- +-- Table: metabib.normalized_subject_field_entry + +-- DROP TABLE metabib.normalized_subject_field_entry; + +DROP TABLE IF EXISTS metabib.normalized_subject_field_entry; + +CREATE TABLE metabib.normalized_subject_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_subject_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_subject_to_ subject_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.subject_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); +ALTER TABLE metabib.normalized_subject_field_entry + OWNER TO evergreen; + +-- Index: metabib."fki_normalized_subject_to_ subject_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_subject_to_ subject_field_entry_FK"; + +CREATE INDEX "fki_normalized_subject_to_ subject_field_entry_FK" + ON metabib.normalized_subject_field_entry + USING btree + (id ); + + + +--------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_author_field_entry + +-- DROP TABLE metabib.normalized_author_field_entry; + +DROP TABLE IF EXISTS metabib.normalized_author_field_entry; + +CREATE TABLE metabib.normalized_author_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_author_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_author_to_ author_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.author_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); +ALTER TABLE metabib.normalized_author_field_entry + OWNER TO evergreen; + +-- Index: metabib."fki_normalized_author_to_ author_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_author_to_ author_field_entry_FK"; + +CREATE INDEX "fki_normalized_author_to_ author_field_entry_FK" + ON metabib.normalized_author_field_entry + USING btree + (id ); + + + --------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_series_field_entry + +-- DROP TABLE metabib.normalized_series_field_entry; + +DROP TABLE IF EXISTS metabib.normalized_series_field_entry; + +CREATE TABLE metabib.normalized_series_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_series_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_series_to_ series_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.series_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); +ALTER TABLE metabib.normalized_series_field_entry + OWNER TO evergreen; + +-- Index: metabib."fki_normalized_series_to_ series_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_series_to_ series_field_entry_FK"; + +CREATE INDEX "fki_normalized_series_to_ series_field_entry_FK" + ON metabib.normalized_series_field_entry + USING btree + (id ); + + + +--------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_keyword_field_entry + +-- DROP TABLE metabib.normalized_keyword_field_entry; + +DROP TABLE IF EXISTS metabib.normalized_keyword_field_entry; + +CREATE TABLE metabib.normalized_keyword_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_keyword_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_keyword_to_ keyword_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.keyword_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); +ALTER TABLE metabib.normalized_keyword_field_entry + OWNER TO evergreen; + +-- Index: metabib."fki_normalized_keyword_to_ keyword_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_keyword_to_ keyword_field_entry_FK"; + +CREATE INDEX "fki_normalized_keyword_to_ keyword_field_entry_FK" + ON metabib.normalized_keyword_field_entry + USING btree + (id ); + +--------------------------------------------------------------------------------------------------------------------------------------------- +--------------------------------------------------------------------------------------------------------------------------------------------- + + +-------------------------------------------------------------------------------------------------------------- +--Create indexes for normalized_field_entry tables, and get pg_trgm if it is not part of the public schema.-- +-------------------------------------------------------------------------------------------------------------- + + CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; + +------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_identifier_field_entry_gist_trgm; + +-- Index: metabib.normalized_series_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm; + +CREATE INDEX normalized_identifier_field_entry_gist_trgm + ON metabib.normalized_identifier_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + + ------------------------------------------------------------------------------------------------------ + +DROP INDEX IF EXISTS metabib.normalized_author_field_entry_gist_trgm; + +-- Index: metabib.normalized_author_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_author_field_entry_gist_trgm; + +CREATE INDEX normalized_author_field_entry_gist_trgm + ON metabib.normalized_author_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_title_field_entry_gist_trgm; + +-- Index: metabib.normalized_title_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_title_field_entry_gist_trgm; + +CREATE INDEX normalized_title_field_entry_gist_trgm + ON metabib.normalized_title_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_subject_field_entry_gist_trgm; + +-- Index: metabib.normalized_subject_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_subject_field_entry_gist_trgm; + +CREATE INDEX normalized_subject_field_entry_gist_trgm + ON metabib.normalized_subject_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_series_field_entry_gist_trgm; + +-- Index: metabib.normalized_series_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm; + +CREATE INDEX normalized_series_field_entry_gist_trgm + ON metabib.normalized_series_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + ------------------------------------------------------------------------------------------------------ + +-- Function: metabib.normalized_field_entry_view() + +-- DROP FUNCTION metabib.normalized_field_entry_view(); + +CREATE OR REPLACE FUNCTION metabib.normalized_field_entry_view() + RETURNS trigger AS +$BODY$ + +DECLARE + norm_table text := TG_TABLE_SCHEMA || '.normalized_' || TG_TABLE_NAME; +BEGIN + + +IF(TG_OP = 'UPDATE') THEN + + EXECUTE 'UPDATE '||norm_table||' + SET value = '''||search_normalize(NEW.value)||''', source = '||NEW.source||' WHERE id = '||NEW.id||';'; + +ELSIF(TG_OP = 'INSERT') THEN + + EXECUTE 'INSERT INTO '||norm_table||' VALUES ( '||NEW.id||','||NEW.source||', '''||search_normalize(NEW.value)||''');'; + +END IF; + +RETURN NULL; + +END; +$BODY$ + LANGUAGE plpgsql VOLATILE + COST 100; +ALTER FUNCTION metabib.normalized_field_entry_view() + OWNER TO evergreen; +--------------------------------------------------------------------------------------------------------------------------------------------- + + + DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.identifier_field_entry; + + CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.identifier_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.subject_field_entry; + + CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.subject_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.author_field_entry; + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.author_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.title_field_entry; + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.title_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.keyword_field_entry; + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.keyword_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.series_field_entry; + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.series_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + COMMIT; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index 88051810eb..5cc284093f 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -4667,7 +4667,10 @@ INSERT into config.org_unit_setting_type 'coust', 'description' ), 'bool', null) - +,('opac.search_mod', 'opac', + 'Modifies the way searches perform when using contains phrase, exact match, and searching for entirely blank fields after normalization.', + 'Changes the way searches perform on contains phrase, exact match, and how searches perform when the search queried is to be entirely normalized.', + 'bool', null) ,( 'sms.enable', 'sms', oils_i18n_gettext( 'sms.enable', diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.normalized_field_entries b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.normalized_field_entries new file mode 100644 index 0000000000..4a9654071f --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.normalized_field_entries @@ -0,0 +1,348 @@ +BEGIN; + +-- Table: metabib.normalized_identifier_field_entry + +-- DROP TABLE metabib.normalized_identifier_field_entry; + +CREATE TABLE metabib.normalized_identifier_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_identifier_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_identifier_to_ identifier_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.identifier_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); + +-- Index: metabib."fki_normalized_identifier_to_ identifier_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_identifier_to_ identifier_field_entry_FK"; + +CREATE INDEX "fki_normalized_identifier_to_ identifier_field_entry_FK" + ON metabib.normalized_identifier_field_entry + USING btree + (id ); + + + INSERT INTO metabib.normalized_identifier_field_entry (id, source, value) +(SELECT id, source, search_normalize(value) FROM metabib.identifier_field_entry); + +-- Table: metabib.normalized_title_field_entry + +-- DROP TABLE metabib.normalized_title_field_entry; + +CREATE TABLE metabib.normalized_title_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_title_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_title_to_ title_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.title_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); + +-- Index: metabib."fki_normalized_title_to_ title_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_title_to_ title_field_entry_FK"; + +CREATE INDEX "fki_normalized_title_to_ title_field_entry_FK" + ON metabib.normalized_title_field_entry + USING btree + (id ); + + + INSERT INTO metabib.normalized_title_field_entry (id, source, value) +(SELECT id, source, search_normalize(value) FROM metabib.title_field_entry); + + +--------------------------------------------------------------------------------------------------------- +-- Table: metabib.normalized_subject_field_entry + +-- DROP TABLE metabib.normalized_subject_field_entry; + +CREATE TABLE metabib.normalized_subject_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_subject_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_subject_to_ subject_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.subject_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); + +-- Index: metabib."fki_normalized_subject_to_ subject_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_subject_to_ subject_field_entry_FK"; + +CREATE INDEX "fki_normalized_subject_to_ subject_field_entry_FK" + ON metabib.normalized_subject_field_entry + USING btree + (id ); + + + INSERT INTO metabib.normalized_subject_field_entry (id, source, value) +(SELECT id, source, search_normalize(value) FROM metabib.subject_field_entry); + + +--------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_author_field_entry + +-- DROP TABLE metabib.normalized_author_field_entry; + +CREATE TABLE metabib.normalized_author_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_author_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_author_to_ author_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.author_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); + +-- Index: metabib."fki_normalized_author_to_ author_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_author_to_ author_field_entry_FK"; + +CREATE INDEX "fki_normalized_author_to_ author_field_entry_FK" + ON metabib.normalized_author_field_entry + USING btree + (id ); + + + INSERT INTO metabib.normalized_author_field_entry (id, source, value) + (SELECT id, source, search_normalize(value) FROM metabib.author_field_entry); + + --------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_series_field_entry + +-- DROP TABLE metabib.normalized_series_field_entry; + +CREATE TABLE metabib.normalized_series_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_series_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_series_to_ series_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.series_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); + +-- Index: metabib."fki_normalized_series_to_ series_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_series_to_ series_field_entry_FK"; + +CREATE INDEX "fki_normalized_series_to_ series_field_entry_FK" + ON metabib.normalized_series_field_entry + USING btree + (id ); + + + INSERT INTO metabib.normalized_series_field_entry (id, source, value) +(SELECT id, source, search_normalize(value) FROM metabib.series_field_entry); + + +--------------------------------------------------------------------------------------------------------- + +-- Table: metabib.normalized_keyword_field_entry + +-- DROP TABLE metabib.normalized_keyword_field_entry; + +CREATE TABLE metabib.normalized_keyword_field_entry +( + id bigint NOT NULL, + source bigint, + value text, + CONSTRAINT normalized_keyword_field_entry_pkey PRIMARY KEY (id ), + CONSTRAINT "normalized_keyword_to_ keyword_field_entry_FK" FOREIGN KEY (id) + REFERENCES metabib.keyword_field_entry (id) MATCH SIMPLE + ON UPDATE NO ACTION ON DELETE CASCADE +) +WITH ( + OIDS=FALSE +); + +-- Index: metabib."fki_normalized_keyword_to_ keyword_field_entry_FK" + +-- DROP INDEX metabib."fki_normalized_keyword_to_ keyword_field_entry_FK"; + +CREATE INDEX "fki_normalized_keyword_to_ keyword_field_entry_FK" + ON metabib.normalized_keyword_field_entry + USING btree + (id ); + + + INSERT INTO metabib.normalized_keyword_field_entry (id, source, value) +(SELECT id, source, search_normalize(value) FROM metabib.keyword_field_entry); + +--------------------------------------------------------------------------------------------------------------------------------------------- +-- Function: metabib.normalized_field_entry_view() + +-- DROP FUNCTION metabib.normalized_field_entry_view(); + +CREATE OR REPLACE FUNCTION metabib.normalized_field_entry_view() + RETURNS trigger AS +$BODY$ + +DECLARE + norm_table text := TG_TABLE_SCHEMA || '.normalized_' || TG_TABLE_NAME; +BEGIN + + +IF(TG_OP = 'UPDATE') THEN + + EXECUTE 'UPDATE '||norm_table||' + SET value = '''||search_normalize(NEW.value)||''', source = '||NEW.source||' WHERE id = '||NEW.id||';'; + +ELSIF(TG_OP = 'INSERT') THEN + + EXECUTE 'INSERT INTO '||norm_table||' VALUES ( '||NEW.id||', '||NEW.source||', '''||search_normalize(NEW.value)||''') );'; + +END IF; + +RETURN NULL; + +END; +$BODY$ + LANGUAGE plpgsql VOLATILE + COST 100; + + ------------------------------------------------------------------------------------------------------------------------- + CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.identifier_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + + CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.subject_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.author_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.title_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.keyword_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + +CREATE TRIGGER normalize_field_entry +AFTER INSERT OR UPDATE ON metabib.series_field_entry + FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view(); + +-------------------------------------------------------------------------------------------------------------- +--Create indexes for normalized_field_entry tables, and get pg_trgm if it is not part of the public schema.-- +-------------------------------------------------------------------------------------------------------------- + + CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; + +------------------------------------------------------------------------------------------------------ + +DROP INDEX IF EXISTS metabib.normalized_author_field_entry_gist_trgm; + +-- Index: metabib.normalized_author_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_author_field_entry_gist_trgm; + +CREATE INDEX normalized_author_field_entry_gist_trgm + ON metabib.normalized_author_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_title_field_entry_gist_trgm; + +-- Index: metabib.normalized_title_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_title_field_entry_gist_trgm; + +CREATE INDEX normalized_title_field_entry_gist_trgm + ON metabib.normalized_title_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_subject_field_entry_gist_trgm; + +-- Index: metabib.normalized_subject_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_subject_field_entry_gist_trgm; + +CREATE INDEX normalized_subject_field_entry_gist_trgm + ON metabib.normalized_subject_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_series_field_entry_gist_trgm; + +-- Index: metabib.normalized_series_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm; + +CREATE INDEX normalized_series_field_entry_gist_trgm + ON metabib.normalized_series_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + + ------------------------------------------------------------------------------------------------------ + + +DROP INDEX IF EXISTS metabib.normalized_identifier_field_entry_gist_trgm; + +-- Index: metabib.normalized_series_field_entry_gist_trgm + +-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm; + +CREATE INDEX normalized_identifier_field_entry_gist_trgm + ON metabib.normalized_identifier_field_entry + USING gist + (value COLLATE pg_catalog."C" gist_trgm_ops); + + ------------------------------------------------------------------------------------------------------ + + +INSERT into config.org_unit_setting_type +( name, grp, label, description, datatype, fm_class ) VALUES +('opac.search_mod', 'opac', + 'Modifies the way searches perform when using contains phrase, exact match, and searching for entirely blank fields after normalization.', + 'Changes the way searches perform on contains phrase, exact match, and how searches perform when the search queried is to be entirely normalized.', + 'bool', null); + + + ------------------------------------------------------------------------------------------------------ + +COMMIT; diff --git a/docs/RELEASE_NOTES_NEXT/search_modifications.txt b/docs/RELEASE_NOTES_NEXT/search_modifications.txt new file mode 100644 index 0000000000..abff443806 --- /dev/null +++ b/docs/RELEASE_NOTES_NEXT/search_modifications.txt @@ -0,0 +1,115 @@ +== Search Modifications + +Jeffrey Bond + +=== Summary `This feature consists of adding a few changes to the way +Evergreen performs searches via the QueryParser.pm module.` + +`Search modifications will alter the way searches perform in the +following ways:` + +* `Exact Match: Now exactly matches what is typed in under normalization +rules.` + ** `Example:` + *** `Before: Search for: title| exact match | horses | + results found 44: Horses, Runway horses., Crazy over horses, etc...` + *** `Now: Search for: title | exact match | horses | + results found 1: Horses` + + *** `Before: Search for: subject| exact match | art greek | + results found 5: Greek Art, The Development of Attic black-figure, + A pillage of art, etc....` + *** `Now: Search for: subject| exact match | art greek | + results found 3: Greek art, A handbook of Greek Art, and The art of + Crete and early Greece.` + +* `Contains Phrase: Will now require that the phrase appear in the index +being searched on. (This feature appears to be in master at the moment. +The only change, now, will be thatalized the phrase is norm and checked +against a normalized table of values).` + ** `Example:` + *** `Before: subject | contains phrase | art greek | + results found 1: Greek Art` + *** `Now: subject | contains phrase | art greek | + results found 3: Greek art, The art of Crete.., + A handbook of Greek art` + +* `Normalized Indexes: Some indexes contain content that are all +normalized characters. This will detect that anomaly and force a search +without using text normalization.` + ** `Example:` + *** `Before: contains | !!! | + results found 0: No results found` + *** `Now: contains | !!! | + results found 1: !!!` + +`These changes will be organization specific and can be turned on or +off. The change will be added to the Admin (.) -> Local Administration +-> Library Settings Editor menu.` + +* `Configuration changes added to 950.seed_values in SQL build scripts. +Upgrade script is available in upgrades.` + +`All changes will be done to the QueryParser.pm perl module at +/OpenILS/Application/Storage/Driver/Pg/Queryparser.pm.` + +* `Functions Added:` + ** `/Application/Storage/Queryparser.pm - search_mods` + *** `Creates an object for query parser to track if search + mods is on or off.` + ** `/Application/Storage/Driver/Pg/QueryParser.pm - search_mod` + *** `Creates additions for main SQL statement returned from + toSQL. Specifically modifies anything created within the flatten sub + routine.` + ** `/Application/Storage/Driver/Pg/QueryParser.pm - naco_normalize` + *** `Makes a call to the database to normalize a string + using search_normalize stored procedure.` + ** `/Application/Storage/Driver/Pg/QueryParser.pm - remove_search_characters` + *** `Removes search characters that dictate + what type of search is to be performed on the query.` + ** `/Application/Storage/Driver/Pg/QueryParser.pm - quote_value` + *** `Escapes all characters for SQL consumption.` + ** `/WWW/EGCatLoader/Search.pm - get_search_mod` + *** `Gets search modification setting.` +* `Functions Modified:` + ** `/Application/Storage/Queryparser.pm - new` + *** `Added a check for search mods to either create or not create the + element inside queryparser.` + ** `/Application/Storage/Driver/Pg/QueryParser.pm - flatten` + *** `Appends pieces of SQL queries to the $from and $where strings.` + ** `/Application/Storage/Publisher/metabib.pm - query_parser_fts_wrapper` + *** `Passes search_mods setting on to query_parser_fts.` + ** `/WWW/EGCatLoader/Search.pm - load_rresults` + *** `Collection information about search_mid configuration and load it + into $args for getting $results.` + +`Remaining changes will be to the database, they include adding 5 tables +of normalized text fields that are created from triggers from the +field_entry tables. The text fields will be indexed using an extension +called Pg_trgm.` + +* `Added 5 new tables, these tables are mapped to the *_field_entry +tables. Each table contains an id which is one to one with the matching +*_field_entry table. The other two columns contain the source and a +value, which is a normalized value of the value field in the +*_field_entry table.` + ** `Added an extension to index the normalized_*_field_entry tables. The + index allows searching using the LIKE operator and is optimized for + phrase matching as well as fuzzy matching. For more information on + pg_trgm visit this site + [[http://www.postgresql.org/docs/9.1/static/pgtrgm.html|Postgres + Pg_Trgm]].` + ** `Added a trigger to populate the normalized_*_field_entry + tables from the *_field_entry tables.` + +==== Blue Print +* `https://blueprints.launchpad.net/evergreen/+spec/search-modifications` + +==== Deliverable +* `Option to have searches perform differently.` +* `Newly Indexed tables for more searching options. (In future releases)` +* `Ability to use an indexed search on tables with LIKE and ILIKE` +* `New operator to fuzzy match values, offering the ability to recognize +spelling errors.` +* `Relevance functions that will score results if theyare spelled +incorrectly. ("neighbor" will be scored similarly to "nieghbor")`