Search Modifications - user/catalystit/search_modifications
authorJeffrey Bond <jbond@catalystitservices.com>
Thu, 24 Jan 2013 21:44:44 +0000 (13:44 -0800)
committerJustin Douma <jdouma@catalystitservices.com>
Fri, 25 Jan 2013 01:57:25 +0000 (17:57 -0800)
Modifies the way searches work under config options in local
administration. Specifically modifies how matches exactly, contains phrase
and fully normalized phrases are searched.

Signed-off-by: Jeffrey Bond <jbond@catalystitservices.com>
Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Driver/Pg/QueryParser.pm
Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/Publisher/metabib.pm
Open-ILS/src/perlmods/lib/OpenILS/Application/Storage/QueryParser.pm
Open-ILS/src/perlmods/lib/OpenILS/WWW/EGCatLoader/Search.pm
Open-ILS/src/sql/Pg/030.schema.metabib.sql
Open-ILS/src/sql/Pg/950.data.seed-values.sql
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.normalized_field_entries [new file with mode: 0644]
docs/RELEASE_NOTES_NEXT/search_modifications.txt [new file with mode: 0644]

index cbfd99c..2964769 100644 (file)
@@ -731,6 +731,189 @@ sub rel_bump {
     return '';
 }
 
+sub naco_normalize{
+       my $string = shift;
+       my $query_handle;
+       my $pg = 'OpenILS::Application::Storage::Driver::Pg';
+       my @dbh = $pg->db_Handles();
+
+       $query_handle = $dbh[0]->prepare("SELECT search_normalize('" . $string . "') AS value");
+       $query_handle->execute();
+    $query_handle->bind_columns( \$string );
+
+       $string = '';
+
+       #Get results of query
+       $query_handle->fetch();
+
+       return $string;
+
+}
+
+sub remove_search_characters{
+       my $str = shift;
+
+       $str =~ s/^[\"\^|\^|\"]//;
+       $str =~ s/[\$|\"]$//;
+       return $str;
+}
+
+sub quote_value {
+       my $string = shift || '';
+       my @string = split(//, $string);
+       $string = '';
+       foreach my $char(@string){
+               if($char ne "'"){
+                       $string .= "\\" . $char;
+               }else{
+                       $string .= $char . $char;
+               }
+       }
+
+       $logger->debug("Quoted value: " . $string);
+
+       return $string;
+
+}
+sub search_mod{
+
+       my $self = shift;
+       my $node = shift;
+       my $searchVal = '';
+       my $searchType = '';
+       my @searchTypes = ('exactMatch', 'containsPhrase', 'doesNotContainPhrase', 'startsWith', 'contains');
+       my $normalized = 0;
+       my $join = '';
+       my $where = '';
+       my $joinType = '';
+       my $table = $node->table;
+       $logger->debug("Contents of node table: " . $table);
+       my @temp = split(/\./, $table);
+       $logger->debug("Contents of temp: " . Dumper(\@temp));
+       $table = join('.normalized_', @temp);
+       $logger->debug("Contents of table: " . $table);
+
+       $logger->debug("Performing search mods.");
+       $logger->debug("Checking for atoms.");
+
+       if ( ( @{$node->only_atoms} )[0] ){
+               $logger->debug("Search contains atoms.");
+               for my $atom ( @{$node->only_atoms} ) {
+                       if($searchVal){
+                               $searchVal .= " " . $atom->content;
+                       }else{
+               $searchVal = $atom->content;
+                       }
+               }
+       }
+
+       #Getting value if it came through in the phrases
+       if(@{$node->phrases}){$searchVal = @{$node->phrases}[0];}
+       if(@{$node->unphrases}){$searchVal = @{$node->unphrases}[0];}
+
+       #No search values found return empty sources
+       if($searchVal eq ''){
+               $logger->debug("No search value found for search mods");
+               return;
+       }
+
+       $logger->debug("Search value constructed!");
+       $logger->debug("Search value: " . $searchVal);
+
+       for(my $i = 0; $i < @searchTypes; $i++){
+               my $last = 0;
+
+               if( $searchTypes[$i] eq 'exactMatch' && $searchVal =~ m/^\^/ && $searchVal =~ m/\$$/ ){ $last = 1;}
+
+               if( $searchTypes[$i] eq 'containsPhrase' && ( ($searchVal =~ m/^\"/ && $searchVal =~ m/\"$/) || @{$node->phrases}) ){ $last = 1; }
+
+               if( $searchTypes[$i] eq 'doesNotContainPhrase' && @{$node->unphrases} ){$last = 1;}
+
+               if( $searchTypes[$i] eq 'startsWith' && (($searchVal =~ m/^\"\^/ && $searchVal =~ m/\"$/) || ( $searchVal =~ m/^\^/ && @{$node->phrases}) ) ){$last = 1;}
+
+        #If last is true set up search type and escape for
+               if($last){
+                       $searchType = $searchTypes[$i];
+                       last;
+               }
+       }
+
+       if(!$searchType){
+               $searchType = 'contains';
+       }
+
+       $logger->debug("Search type found: $searchType");
+
+       $searchVal = remove_search_characters($searchVal);
+       my $nsVal = naco_normalize($searchVal);
+
+       if($nsVal){
+               $searchVal = $nsVal;
+       }else{
+               $normalized = 1;
+       }
+
+       my $queryParam = {searchVal=>$searchVal,
+                                         searchType=>$searchType,
+                                         normalized=>$normalized,
+                                         table=>$table};
+
+       $logger->debug("Query param: " . Dumper($queryParam));
+
+       if($normalized ||
+               $queryParam->{searchType} eq 'containsPhrase' ||
+               $queryParam->{searchType} eq 'exactMatch'){
+
+               $join .= "\n\t JOIN " . $queryParam->{table} . " AS norm ON (fe.id = norm.id)";
+               $where .= "\n\t WHERE 1 = 1 ";
+
+       }
+       if($normalized){
+               $joinType = 'LEFT';
+               if($queryParam->{searchType} eq 'containsPhrase'){
+                       $where .= "\n\t\t AND (fe.value LIKE '" . quote_value($queryParam->{searchVal}) . " %' OR fe.value LIKE '% " . quote_value($queryParam->{searchVal}) . " %' OR fe.value LIKE '% " . quote_value($queryParam->{searchVal}) . "')";
+               }elsif($queryParam->{searchType} eq 'doesNotContainPhrase'){
+                       $where .= "\n\t\t AND (fe.value NOT LIKE '" . quote_value($queryParam->{searchVal}) . " %' OR fe.value NOT LIKE '% " . quote_value($queryParam->{searchVal}) . " %' OR fe.value NOT LIKE '% " . quote_value($queryParam->{searchVal}) . "')";
+               }elsif($queryParam->{searchType} eq 'exactMatch'){
+                       $where .= "\n\t\t AND (fe.value LIKE '" . quote_value($queryParam->{searchVal}) . "')";
+               }elsif($queryParam->{searchType} eq 'startsWith'){
+                       $where .= "\n\t\t AND (fe.value LIKE '" . quote_value($queryParam->{searchVal}) . "%')";
+               }elsif($queryParam->{searchType} eq 'contains'){
+                       my @contains = split(/ /, $queryParam->{searchVal});
+                       $where .= "\n\t\t AND ( ";
+                       for(my $i = 0; $i < @contains; $i++){
+
+                               $where .= "fe.value LIKE '%" . quote_value($contains[$i]) . "%'";
+
+                               unless($i == $#contains){
+                                       $where .= " AND "
+                               }
+                       }
+
+                       $where .= ")";
+               }
+       }else{
+               $joinType = 'RIGHT';
+               if($queryParam->{searchType} eq 'containsPhrase'){
+                       $where .= "\n\t\t AND (norm.value LIKE '" . $queryParam->{searchVal} . " %' OR norm.value LIKE '% " . $queryParam->{searchVal} . " %' OR norm.value LIKE '% " . $queryParam->{searchVal} . "')";
+               }
+#Does not contain phrase seems to have an issue outside the scope of this feature
+#              elsif($queryParam->{searchType} eq 'doesNotContainPhrase'){
+#                      $where .= "\n\t\t AND (norm.value NOT LIKE '% " . $queryParam->{searchVal} . "%' OR norm.value NOT LIKE '%" . $queryParam->{searchVal} . "%' OR norm.value NOT LIKE '% " . $queryParam->{searchVal} . " %')";   }
+
+               elsif($queryParam->{searchType} eq 'exactMatch'){
+                       $where .= "\n\t\t AND (norm.value LIKE '" . $queryParam->{searchVal} . "')";
+               }
+       }
+
+       my $return = {'join'=>$join, 'where'=>$where, 'joinType'=>$joinType};
+
+       $logger->debug("Contents of search_mod: " . Dumper($return));
+
+       return $return;
+
+}
+
 sub flatten {
     my $self = shift;
 
@@ -761,6 +944,9 @@ sub flatten {
                 my $table = $node->table;
                 my $talias = $node->table_alias;
 
+                               my $search_mods = $self->QueryParser->search_mods ? search_mod($self, $node) : '';
+                               my $jt = $search_mods ? $search_mods->{'joinType'} : '';
+
                 my $node_rank = 'COALESCE(' . $node->rank . " * ${talias}.weight, 0.0)";
 
                 my $core_limit = $self->QueryParser->core_limit || 25000;
@@ -770,11 +956,15 @@ sub flatten {
                 if ($node->dummy_count < @{$node->only_atoms} ) {
                     $with .= ",\n" if $with;
                     $with .= "${talias}_xq AS (SELECT ". $node->tsquery ." AS tsq )";
-                    $from .= "\n${spc}${spc}${spc}JOIN ${talias}_xq ON (fe.index_vector @@ ${talias}_xq.tsq)";
+                    $from .= "\n${spc}${spc}${spc} " . $jt . " JOIN ${talias}_xq ON (fe.index_vector @@ ${talias}_xq.tsq)";
                 } else {
                     $from .= "\n${spc}${spc}${spc}, (SELECT NULL::tsquery AS tsq ) AS x";
                 }
 
+                               $from .= $search_mods->{'join'} if $search_mods;
+
+
+                               my $fieldIds;
                 my @bump_fields;
                 if (@{$node->fields} > 0) {
                     @bump_fields = @{$node->fields};
@@ -789,12 +979,15 @@ sub flatten {
                     if (@field_ids) {
                         $from .= "\n${spc}${spc}${spc}WHERE fe_weight.id IN  (" .
                             join(',', @field_ids) . ")";
+                        $fieldIds = "(" . join(',', @field_ids) . ")";
                     }
 
                 } else {
                     @bump_fields = @{$self->QueryParser->search_fields->{$node->classname}};
                 }
 
+                               $from .= $search_mods->{'where'} if $search_mods;
+
                 ###$from .= "\n${spc}${spc}LIMIT $core_limit";
                 $from .= "\n${spc}) AS $talias ON (m.source = ${talias}.source)";
 
@@ -816,8 +1009,10 @@ sub flatten {
 
 
                 my $twhere .= '(' . $talias . ".id IS NOT NULL";
-                $twhere .= ' AND ' . join(' AND ', map {"${talias}.value ~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->phrases}) if (@{$node->phrases});
-                $twhere .= ' AND ' . join(' AND ', map {"${talias}.value !~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->unphrases}) if (@{$node->unphrases});
+                unless($search_mods){
+                   $twhere .= ' AND ' . join(' AND ', map {"${talias}.value ~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->phrases}) if (@{$node->phrases});
+                   $twhere .= ' AND ' . join(' AND ', map {"${talias}.value !~* ".$self->QueryParser->quote_phrase_value($_)} @{$node->unphrases}) if (@{$node->unphrases});
+                }
                 $twhere .= ')';
 
                 if (@dyn_filters or !$self->top_plan) {
index 722c6c5..909e484 100644 (file)
@@ -2863,7 +2863,7 @@ sub str2abstract_query {
 
     _initialize_parser($parser) unless $parser->initialization_complete;
 
-    my $query = $parser->new(%use_opts)->parse;
+    $query = $parser->new(%use_opts)->parse;
 
     return $query->parse_tree->to_abstract_query(with_config => $with_config);
 }
@@ -2928,6 +2928,7 @@ sub query_parser_fts {
     }
 
 
+       $log->debug("OU: ". Dumper($args{ou}));
     # parse the query and supply any query-level %arg-based defaults
     # we expect, and make use of, query, superpage, superpage_size, debug and core_limit args
     my $query = $parser->new( %args )->parse;
@@ -3308,7 +3309,7 @@ sub query_parser_fts_wrapper {
 
     $log->debug("Full QueryParser query: $query", DEBUG);
 
-    return query_parser_fts($self, $client, query => $query, _simple_plan => $base_plan->simple_plan );
+    return query_parser_fts($self, $client, query => $query, _simple_plan => $base_plan->simple_plan, search_mods => $args{search_mods} );
 }
 __PACKAGE__->register_method(
        api_name        => "open-ils.storage.biblio.multiclass.staged.search_fts",
index 7ee0402..d0c3f77 100644 (file)
@@ -95,6 +95,10 @@ sub new {
         $self->$opt( $opts{$opt} ) if ($self->can($opt));
     }
 
+       if($opts{search_mods}){
+               $self->{search_mods} = $opts{search_mods};
+       }
+
     return $self;
 }
 
@@ -784,6 +788,14 @@ sub core_limit {
     return $self->{core_limit};
 }
 
+sub search_mods {
+    my $self = shift;
+    my $l = shift;
+    $self->{search_mods} = $l if ($l);
+    return $self->{search_mods};
+}
+
+
 sub superpage {
     my $self = shift;
     my $l = shift;
index dea0145..8f7e4c1 100644 (file)
@@ -294,6 +294,28 @@ sub load_rresults_bookbag_item_notes {
     return;
 }
 
+#Check if user is using search modifications
+sub get_search_mod{
+
+       my $self = shift;
+       my $ctx = $self->ctx;
+       my $ou = '';
+       my $sname = 'opac.search_mod';
+
+       #Check if user is staff or patron
+       if($ctx->{is_staff}){
+               $ou = $ctx->{user}->ws_ou;
+       }else{
+               $ou = $self->_get_pref_lib() || $self->_get_search_lib();
+       }
+
+       #Get settings, set search type
+       my $return = $self->ctx->{get_org_setting}->($ou, $sname) ? 1 : 0;
+
+
+       $logger->debug("Get Search Mods: " . $return);
+       return $return;
+}
 # context additions: 
 #   page_size
 #   hit_count
@@ -396,9 +418,11 @@ sub load_rresults {
             return $self->generic_redirect;
         }
 
+               my $search_mod = get_search_mod($self);
+
         # Limit and offset will stay here. Everything else should be part of
         # the query string, not special args.
-        my $args = {'limit' => $limit, 'offset' => $offset};
+        my $args = {'limit' => $limit, 'offset' => $offset, 'search_mods' => $search_mod};
 
         if ($tag_circs) {
             $args->{tag_circulated_records} = 1;
index e833ef2..b17fce9 100644 (file)
@@ -1491,4 +1491,363 @@ SELECT  DISTINCT
 END;
 $func$ LANGUAGE PLPGSQL;
 
+
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_identifier_field_entry
+
+-- DROP TABLE metabib.normalized_identifier_field_entry;
+
+DROP TABLE IF EXISTS metabib.normalized_identifier_field_entry;
+
+CREATE TABLE metabib.normalized_identifier_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_identifier_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_identifier_to_ identifier_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.identifier_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE metabib.normalized_identifier_field_entry
+  OWNER TO evergreen;
+
+-- Index: metabib."fki_normalized_identifier_to_ identifier_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_identifier_to_ identifier_field_entry_FK";
+
+CREATE INDEX "fki_normalized_identifier_to_ identifier_field_entry_FK"
+  ON metabib.normalized_identifier_field_entry
+  USING btree
+  (id );
+
+---------------------------------------------------------------------------------------------------------
+
+
+-- Table: metabib.normalized_title_field_entry
+
+-- DROP TABLE metabib.normalized_title_field_entry;
+
+DROP TABLE IF EXISTS metabib.normalized_title_field_entry;
+
+CREATE TABLE metabib.normalized_title_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_title_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_title_to_ title_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.title_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE metabib.normalized_title_field_entry
+  OWNER TO evergreen;
+
+-- Index: metabib."fki_normalized_title_to_ title_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_title_to_ title_field_entry_FK";
+
+CREATE INDEX "fki_normalized_title_to_ title_field_entry_FK"
+  ON metabib.normalized_title_field_entry
+  USING btree
+  (id );
+
+---------------------------------------------------------------------------------------------------------
+-- Table: metabib.normalized_subject_field_entry
+
+-- DROP TABLE metabib.normalized_subject_field_entry;
+
+DROP TABLE IF EXISTS metabib.normalized_subject_field_entry;
+
+CREATE TABLE metabib.normalized_subject_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_subject_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_subject_to_ subject_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.subject_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE metabib.normalized_subject_field_entry
+  OWNER TO evergreen;
+
+-- Index: metabib."fki_normalized_subject_to_ subject_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_subject_to_ subject_field_entry_FK";
+
+CREATE INDEX "fki_normalized_subject_to_ subject_field_entry_FK"
+  ON metabib.normalized_subject_field_entry
+  USING btree
+  (id );
+
+
+
+---------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_author_field_entry
+
+-- DROP TABLE metabib.normalized_author_field_entry;
+
+DROP TABLE IF EXISTS metabib.normalized_author_field_entry;
+
+CREATE TABLE metabib.normalized_author_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_author_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_author_to_ author_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.author_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE metabib.normalized_author_field_entry
+  OWNER TO evergreen;
+
+-- Index: metabib."fki_normalized_author_to_ author_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_author_to_ author_field_entry_FK";
+
+CREATE INDEX "fki_normalized_author_to_ author_field_entry_FK"
+  ON metabib.normalized_author_field_entry
+  USING btree
+  (id );
+
+
+ ---------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_series_field_entry
+
+-- DROP TABLE metabib.normalized_series_field_entry;
+
+DROP TABLE IF EXISTS metabib.normalized_series_field_entry;
+
+CREATE TABLE metabib.normalized_series_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_series_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_series_to_ series_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.series_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE metabib.normalized_series_field_entry
+  OWNER TO evergreen;
+
+-- Index: metabib."fki_normalized_series_to_ series_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_series_to_ series_field_entry_FK";
+
+CREATE INDEX "fki_normalized_series_to_ series_field_entry_FK"
+  ON metabib.normalized_series_field_entry
+  USING btree
+  (id );
+
+
+
+---------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_keyword_field_entry
+
+-- DROP TABLE metabib.normalized_keyword_field_entry;
+
+DROP TABLE IF EXISTS metabib.normalized_keyword_field_entry;
+
+CREATE TABLE metabib.normalized_keyword_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_keyword_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_keyword_to_ keyword_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.keyword_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+ALTER TABLE metabib.normalized_keyword_field_entry
+  OWNER TO evergreen;
+
+-- Index: metabib."fki_normalized_keyword_to_ keyword_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_keyword_to_ keyword_field_entry_FK";
+
+CREATE INDEX "fki_normalized_keyword_to_ keyword_field_entry_FK"
+  ON metabib.normalized_keyword_field_entry
+  USING btree
+  (id );
+
+---------------------------------------------------------------------------------------------------------------------------------------------
+---------------------------------------------------------------------------------------------------------------------------------------------
+
+
+--------------------------------------------------------------------------------------------------------------
+--Create indexes for normalized_field_entry tables, and get pg_trgm if it is not part of the public schema.--
+--------------------------------------------------------------------------------------------------------------
+
+ CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
+
+------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_identifier_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_series_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm;
+
+CREATE INDEX normalized_identifier_field_entry_gist_trgm
+  ON metabib.normalized_identifier_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+DROP INDEX IF EXISTS metabib.normalized_author_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_author_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_author_field_entry_gist_trgm;
+
+CREATE INDEX normalized_author_field_entry_gist_trgm
+  ON metabib.normalized_author_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_title_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_title_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_title_field_entry_gist_trgm;
+
+CREATE INDEX normalized_title_field_entry_gist_trgm
+  ON metabib.normalized_title_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_subject_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_subject_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_subject_field_entry_gist_trgm;
+
+CREATE INDEX normalized_subject_field_entry_gist_trgm
+  ON metabib.normalized_subject_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_series_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_series_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm;
+
+CREATE INDEX normalized_series_field_entry_gist_trgm
+  ON metabib.normalized_series_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+  ------------------------------------------------------------------------------------------------------
+
+-- Function: metabib.normalized_field_entry_view()
+
+-- DROP FUNCTION metabib.normalized_field_entry_view();
+
+CREATE OR REPLACE FUNCTION metabib.normalized_field_entry_view()
+  RETURNS trigger AS
+$BODY$
+
+DECLARE
+       norm_table      text    := TG_TABLE_SCHEMA || '.normalized_' || TG_TABLE_NAME;
+BEGIN
+
+
+IF(TG_OP = 'UPDATE') THEN
+
+       EXECUTE 'UPDATE '||norm_table||'
+       SET value = '''||search_normalize(NEW.value)||''', source = '||NEW.source||' WHERE id = '||NEW.id||';';
+
+ELSIF(TG_OP = 'INSERT') THEN
+
+       EXECUTE 'INSERT INTO '||norm_table||' VALUES ( '||NEW.id||','||NEW.source||', '''||search_normalize(NEW.value)||''');';
+
+END IF;
+
+RETURN NULL;
+
+END;
+$BODY$
+  LANGUAGE plpgsql VOLATILE
+  COST 100;
+ALTER FUNCTION metabib.normalized_field_entry_view()
+  OWNER TO evergreen;
+---------------------------------------------------------------------------------------------------------------------------------------------
+
+
+  DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.identifier_field_entry;
+
+  CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.identifier_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+  DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.subject_field_entry;
+
+  CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.subject_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+  DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.author_field_entry;
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.author_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+  DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.title_field_entry;
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.title_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+  DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.keyword_field_entry;
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.keyword_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+  DROP TRIGGER IF EXISTS normalize_field_entry ON metabib.series_field_entry;
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.series_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+
 COMMIT;
index 8805181..5cc2840 100644 (file)
@@ -4667,7 +4667,10 @@ INSERT into config.org_unit_setting_type
         'coust', 'description'
     ),
     'bool', null)
-
+,('opac.search_mod', 'opac',
+       'Modifies the way searches perform when using contains phrase, exact match, and searching for entirely blank fields after normalization.',
+       'Changes the way searches perform on contains phrase, exact match, and how searches perform when the search queried is to be entirely normalized.',
+       'bool', null)
 ,( 'sms.enable', 'sms',
     oils_i18n_gettext(
         'sms.enable',
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.normalized_field_entries b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.normalized_field_entries
new file mode 100644 (file)
index 0000000..4a96540
--- /dev/null
@@ -0,0 +1,348 @@
+BEGIN;
+
+-- Table: metabib.normalized_identifier_field_entry
+
+-- DROP TABLE metabib.normalized_identifier_field_entry;
+
+CREATE TABLE metabib.normalized_identifier_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_identifier_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_identifier_to_ identifier_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.identifier_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+
+-- Index: metabib."fki_normalized_identifier_to_ identifier_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_identifier_to_ identifier_field_entry_FK";
+
+CREATE INDEX "fki_normalized_identifier_to_ identifier_field_entry_FK"
+  ON metabib.normalized_identifier_field_entry
+  USING btree
+  (id );
+
+
+ INSERT INTO metabib.normalized_identifier_field_entry (id, source, value)
+(SELECT id, source, search_normalize(value) FROM metabib.identifier_field_entry);
+
+-- Table: metabib.normalized_title_field_entry
+
+-- DROP TABLE metabib.normalized_title_field_entry;
+
+CREATE TABLE metabib.normalized_title_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_title_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_title_to_ title_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.title_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+
+-- Index: metabib."fki_normalized_title_to_ title_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_title_to_ title_field_entry_FK";
+
+CREATE INDEX "fki_normalized_title_to_ title_field_entry_FK"
+  ON metabib.normalized_title_field_entry
+  USING btree
+  (id );
+
+
+ INSERT INTO metabib.normalized_title_field_entry (id, source, value)
+(SELECT id, source, search_normalize(value) FROM metabib.title_field_entry);
+
+
+---------------------------------------------------------------------------------------------------------
+-- Table: metabib.normalized_subject_field_entry
+
+-- DROP TABLE metabib.normalized_subject_field_entry;
+
+CREATE TABLE metabib.normalized_subject_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_subject_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_subject_to_ subject_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.subject_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+
+-- Index: metabib."fki_normalized_subject_to_ subject_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_subject_to_ subject_field_entry_FK";
+
+CREATE INDEX "fki_normalized_subject_to_ subject_field_entry_FK"
+  ON metabib.normalized_subject_field_entry
+  USING btree
+  (id );
+
+
+ INSERT INTO metabib.normalized_subject_field_entry (id, source, value)
+(SELECT id, source, search_normalize(value) FROM metabib.subject_field_entry);
+
+
+---------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_author_field_entry
+
+-- DROP TABLE metabib.normalized_author_field_entry;
+
+CREATE TABLE metabib.normalized_author_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_author_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_author_to_ author_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.author_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+
+-- Index: metabib."fki_normalized_author_to_ author_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_author_to_ author_field_entry_FK";
+
+CREATE INDEX "fki_normalized_author_to_ author_field_entry_FK"
+  ON metabib.normalized_author_field_entry
+  USING btree
+  (id );
+
+
+ INSERT INTO metabib.normalized_author_field_entry (id, source, value)
+ (SELECT id, source, search_normalize(value) FROM metabib.author_field_entry);
+
+ ---------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_series_field_entry
+
+-- DROP TABLE metabib.normalized_series_field_entry;
+
+CREATE TABLE metabib.normalized_series_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_series_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_series_to_ series_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.series_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+
+-- Index: metabib."fki_normalized_series_to_ series_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_series_to_ series_field_entry_FK";
+
+CREATE INDEX "fki_normalized_series_to_ series_field_entry_FK"
+  ON metabib.normalized_series_field_entry
+  USING btree
+  (id );
+
+
+ INSERT INTO metabib.normalized_series_field_entry (id, source, value)
+(SELECT id, source, search_normalize(value) FROM metabib.series_field_entry);
+
+
+---------------------------------------------------------------------------------------------------------
+
+-- Table: metabib.normalized_keyword_field_entry
+
+-- DROP TABLE metabib.normalized_keyword_field_entry;
+
+CREATE TABLE metabib.normalized_keyword_field_entry
+(
+  id bigint NOT NULL,
+  source bigint,
+  value text,
+  CONSTRAINT normalized_keyword_field_entry_pkey PRIMARY KEY (id ),
+  CONSTRAINT "normalized_keyword_to_ keyword_field_entry_FK" FOREIGN KEY (id)
+      REFERENCES metabib.keyword_field_entry (id) MATCH SIMPLE
+      ON UPDATE NO ACTION ON DELETE CASCADE
+)
+WITH (
+  OIDS=FALSE
+);
+
+-- Index: metabib."fki_normalized_keyword_to_ keyword_field_entry_FK"
+
+-- DROP INDEX metabib."fki_normalized_keyword_to_ keyword_field_entry_FK";
+
+CREATE INDEX "fki_normalized_keyword_to_ keyword_field_entry_FK"
+  ON metabib.normalized_keyword_field_entry
+  USING btree
+  (id );
+
+
+ INSERT INTO metabib.normalized_keyword_field_entry (id, source, value)
+(SELECT id, source, search_normalize(value) FROM metabib.keyword_field_entry);
+
+---------------------------------------------------------------------------------------------------------------------------------------------
+-- Function: metabib.normalized_field_entry_view()
+
+-- DROP FUNCTION metabib.normalized_field_entry_view();
+
+CREATE OR REPLACE FUNCTION metabib.normalized_field_entry_view()
+  RETURNS trigger AS
+$BODY$
+
+DECLARE
+       norm_table      text    := TG_TABLE_SCHEMA || '.normalized_' || TG_TABLE_NAME;
+BEGIN
+
+
+IF(TG_OP = 'UPDATE') THEN
+
+       EXECUTE 'UPDATE '||norm_table||'
+       SET value = '''||search_normalize(NEW.value)||''', source = '||NEW.source||' WHERE id = '||NEW.id||';';
+
+ELSIF(TG_OP = 'INSERT') THEN
+
+       EXECUTE 'INSERT INTO '||norm_table||' VALUES ( '||NEW.id||', '||NEW.source||', '''||search_normalize(NEW.value)||''') );';
+
+END IF;
+
+RETURN NULL;
+
+END;
+$BODY$
+  LANGUAGE plpgsql VOLATILE
+  COST 100;
+
+ -------------------------------------------------------------------------------------------------------------------------
+  CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.identifier_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+  CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.subject_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.author_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.title_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.keyword_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+CREATE TRIGGER normalize_field_entry
+AFTER INSERT OR UPDATE ON metabib.series_field_entry
+    FOR EACH ROW EXECUTE PROCEDURE metabib.normalized_field_entry_view();
+
+--------------------------------------------------------------------------------------------------------------
+--Create indexes for normalized_field_entry tables, and get pg_trgm if it is not part of the public schema.--
+--------------------------------------------------------------------------------------------------------------
+
+ CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
+
+------------------------------------------------------------------------------------------------------
+
+DROP INDEX IF EXISTS metabib.normalized_author_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_author_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_author_field_entry_gist_trgm;
+
+CREATE INDEX normalized_author_field_entry_gist_trgm
+  ON metabib.normalized_author_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_title_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_title_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_title_field_entry_gist_trgm;
+
+CREATE INDEX normalized_title_field_entry_gist_trgm
+  ON metabib.normalized_title_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_subject_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_subject_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_subject_field_entry_gist_trgm;
+
+CREATE INDEX normalized_subject_field_entry_gist_trgm
+  ON metabib.normalized_subject_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_series_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_series_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm;
+
+CREATE INDEX normalized_series_field_entry_gist_trgm
+  ON metabib.normalized_series_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+
+DROP INDEX IF EXISTS metabib.normalized_identifier_field_entry_gist_trgm;
+
+-- Index: metabib.normalized_series_field_entry_gist_trgm
+
+-- DROP INDEX metabib.normalized_series_field_entry_gist_trgm;
+
+CREATE INDEX normalized_identifier_field_entry_gist_trgm
+  ON metabib.normalized_identifier_field_entry
+  USING gist
+  (value COLLATE pg_catalog."C" gist_trgm_ops);
+
+  ------------------------------------------------------------------------------------------------------
+
+
+INSERT into config.org_unit_setting_type
+( name, grp, label, description, datatype, fm_class ) VALUES
+('opac.search_mod', 'opac',
+       'Modifies the way searches perform when using contains phrase, exact match, and searching for entirely blank fields after normalization.',
+       'Changes the way searches perform on contains phrase, exact match, and how searches perform when the search queried is to be entirely normalized.',
+       'bool', null);
+
+
+  ------------------------------------------------------------------------------------------------------
+
+COMMIT;
diff --git a/docs/RELEASE_NOTES_NEXT/search_modifications.txt b/docs/RELEASE_NOTES_NEXT/search_modifications.txt
new file mode 100644 (file)
index 0000000..abff443
--- /dev/null
@@ -0,0 +1,115 @@
+== Search Modifications
+
+Jeffrey Bond <jBond@catalystitservices.com>
+
+=== Summary `This feature consists of adding a few changes to the way
+Evergreen performs searches via the QueryParser.pm module.`
+
+`Search modifications will alter the way searches perform in the
+following ways:`
+
+* `Exact Match: Now exactly matches what is typed in under normalization
+rules.`
+    ** `Example:`
+    *** `Before: Search for: title| exact match | horses |
+            results found 44: Horses, Runway horses., Crazy over horses, etc...`
+    *** `Now: Search for: title | exact match | horses |
+            results found 1: Horses`
+
+    *** `Before: Search for: subject| exact match | art greek |
+            results found 5: Greek Art, The Development of Attic black-figure,
+            A pillage of art, etc....`
+    *** `Now: Search for: subject| exact match | art greek |
+            results found 3: Greek art, A handbook of Greek Art, and The art of
+            Crete and early Greece.`
+
+* `Contains Phrase: Will now require that the phrase appear in the index
+being searched on. (This feature appears to be in master at the moment.
+The only change, now, will be thatalized the phrase is norm and checked
+against a normalized table of values).`
+    ** `Example:`
+        *** `Before: subject | contains phrase | art greek |
+                results found 1: Greek Art`
+        *** `Now: subject | contains phrase | art greek |
+                results found 3: Greek art, The art of Crete..,
+                A handbook of Greek art`
+
+* `Normalized Indexes: Some indexes contain content that are all
+normalized characters. This will detect that anomaly and force a search
+without using text normalization.`
+    ** `Example:`
+        *** `Before: contains | !!! |
+                results found 0: No results found`
+        *** `Now: contains | !!! |
+                results found 1: !!!`
+
+`These changes will be organization specific and can be turned on or
+off. The change will be added to the Admin (.) -> Local Administration
+-> Library Settings Editor menu.`
+
+* `Configuration changes added to 950.seed_values in SQL build scripts.
+Upgrade script is available in upgrades.`
+
+`All changes will be done to the QueryParser.pm perl module at
+/OpenILS/Application/Storage/Driver/Pg/Queryparser.pm.`
+
+* `Functions Added:`
+    ** `/Application/Storage/Queryparser.pm - search_mods`
+        *** `Creates an object for query parser to track if search
+             mods is on or off.`
+    ** `/Application/Storage/Driver/Pg/QueryParser.pm - search_mod`
+         *** `Creates additions for main SQL statement returned from
+        toSQL. Specifically modifies anything created within the flatten sub
+        routine.`
+    ** `/Application/Storage/Driver/Pg/QueryParser.pm - naco_normalize`
+        *** `Makes a call to the database to normalize a string
+        using search_normalize stored procedure.`
+    ** `/Application/Storage/Driver/Pg/QueryParser.pm - remove_search_characters`
+        *** `Removes search characters that dictate
+        what type of search is to be performed on the query.`
+    ** `/Application/Storage/Driver/Pg/QueryParser.pm - quote_value`
+        *** `Escapes all characters for SQL consumption.`
+    ** `/WWW/EGCatLoader/Search.pm - get_search_mod`
+        *** `Gets search modification setting.`
+* `Functions Modified:`
+    ** `/Application/Storage/Queryparser.pm - new`
+        *** `Added a check for search mods to either create or not create the
+        element inside queryparser.`
+    ** `/Application/Storage/Driver/Pg/QueryParser.pm - flatten`
+        *** `Appends pieces of SQL queries to the $from and $where strings.`
+    ** `/Application/Storage/Publisher/metabib.pm - query_parser_fts_wrapper`
+        *** `Passes search_mods setting on to query_parser_fts.`
+    ** `/WWW/EGCatLoader/Search.pm - load_rresults`
+        *** `Collection information about search_mid configuration and load it
+        into $args for getting $results.`
+
+`Remaining changes will be to the database, they include adding 5 tables
+of normalized text fields that are created from triggers from the
+field_entry tables. The text fields will be indexed using an extension
+called Pg_trgm.`
+
+* `Added 5 new tables, these tables are mapped to the *_field_entry
+tables. Each table contains an id which is one to one with the matching
+*_field_entry table. The other two columns contain the source and a
+value, which is a normalized value of the value field in the
+*_field_entry table.`
+    ** `Added an extension to index the normalized_*_field_entry tables. The
+    index allows searching using the LIKE operator and is optimized for
+    phrase matching as well as fuzzy matching. For more information on
+    pg_trgm visit this site
+    [[http://www.postgresql.org/docs/9.1/static/pgtrgm.html|Postgres
+    Pg_Trgm]].`
+    ** `Added a trigger to populate the normalized_*_field_entry
+    tables from the *_field_entry tables.`
+
+==== Blue Print
+* `https://blueprints.launchpad.net/evergreen/+spec/search-modifications`
+
+==== Deliverable
+* `Option to have searches perform differently.`
+* `Newly Indexed tables for more searching options. (In future releases)`
+* `Ability to use an indexed search on tables with LIKE and ILIKE`
+* `New operator to fuzzy match values, offering the ability to recognize
+spelling errors.`
+* `Relevance functions that will score results if theyare spelled
+incorrectly. ("neighbor" will be scored similarly to "nieghbor")`