From: Bill Erickson Date: Fri, 2 Nov 2018 20:56:37 +0000 (-0400) Subject: Group searches OR-based; lower kw normalizer X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=32d80876956f4bfa1f6cc2d46210bca58d6feadf;p=working%2FEvergreen.git Group searches OR-based; lower kw normalizer Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm index c2765cb4bb..902d7cf57a 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm @@ -152,12 +152,8 @@ sub translate_query_node { } elsif ($type eq 'facet') { - # Our ES filters are indexes under the .raw multi-field. - my $name = $child->{name} . '.raw'; - for my $value (@{$child->{values}}) { - # TODO $filter->{negate} - push(@$filter_nodes, {term => {$name => $value}}); + push(@$filter_nodes, {term => {$child->{name} => $value}}); } } } @@ -205,30 +201,16 @@ sub translate_query_node { my $field_class = $node->{class}; # e.g. subject my @fields = @{$node->{fields}}; # e.g. temporal (optional) + # class-level searches are OR/should searches across all + # fields in the selected class. + @fields = map {$_->name} + grep {$_->search_group eq $field_class} @$bib_search_fields + unless @fields; + # note: $joiner is always '&' for type=node my ($joiner) = keys %{$node->{children}}; my $children = $node->{children}->{$joiner}; - my $bool_nodes = []; - - # phrase match - # keyword:"piano music" - # prefix '"' - # suffix '"' - # content 'piano music' - - # exact match - # keyword:^piano music$' - # 2 children - # content '^piano' - # content 'music$' - - # negate phrase match - # keyword:-"piano music" - # prefix '-"' - # suffic '"' - # content 'piano music' - # Content is only split across children when multiple words # are part of the same query structure, e.g. kw:piano music # This equates to a match search with multiple words in ES. @@ -247,12 +229,12 @@ sub translate_query_node { $match_op = 'match_phrase' if $prefix eq '"'; # Should we use the .raw keyword field? - my $use_keyword = ''; + my $text_search = 1; # Matchiness specificiers embedded in the content override # the query node prefix. if ($first_char eq '^') { - $use_keyword = '.raw'; + $text_search = 0; $content = substr($content, 1); if ($last_char eq '$') { # "Matches Exactly" @@ -266,35 +248,49 @@ sub translate_query_node { } } - # TODO TODO - # avoid indexing "title" and instead only create field-specific - # indexes. When searching "title" perform an OR search across - # all title:* fields. - # title:^Harry potter -- this is not guaranteed to work on a - # grouped 'title' index since it may start with an unexpected - # variation of the title. - - if (@fields) { # field-level search - for my $field (@fields) { - push(@$bool_nodes, - {$match_op => {"$field_class|$field$use_keyword" => $content}}); + # for match queries, treat multi-word search as AND searches + # instead of the default ES OR searches. + $content = {query => $content, operator => 'and'} + if $match_op eq 'match'; + + my $field_nodes = []; + for my $field (@fields) { + my $key = "$field_class|$field"; + + + if ($text_search) { + # use the full-text indices + + push(@$field_nodes, + {$match_op => {"$key.text" => $content}}); + + push(@$field_nodes, + {$match_op => {"$key.text_folded" => $content}}); + + } else { + + # Use the lowercase normalized keyword index for non-text searches. + push(@$field_nodes, {$match_op => {"$key.lower" => $content}}); } - } else { # class-level search - push(@$bool_nodes, {$match_op => {$field_class => $content}}); } $logger->info("ES content = $content / bools = ". - OpenSRF::Utils::JSON->perl2JSON($bool_nodes)); - - # check for negate queries - my $bool_op = $prefix eq '-"' ? 'must_not' : 'must'; + OpenSRF::Utils::JSON->perl2JSON($field_nodes)); - # only add the bool nesting when necessary. - if (@$bool_nodes > 1 || $bool_op eq 'must_not') { - return {bool => {$bool_op => $bool_nodes}}; + my $query; + if (scalar(@$field_nodes) == 1) { + $query = {bool => {must => $field_nodes}}; } else { - return $bool_nodes->[0]; + # Query multiple fields within a search class via OR query. + $query = {bool => {should => $field_nodes}}; } + + if ($prefix eq '-"') { + # Negation query. Wrap the whole shebang in a must_not + $query = {bool => {must_not => $query}}; + } + + return $query; } } @@ -389,14 +385,7 @@ sub add_elastic_facet_aggregations { my $fgrp = $facet->search_group; $fname = "$fgrp|$fname" if $fgrp; - # Search fields have a .raw multi-field for indexing the - # raw (keyword) value for aggregation. - # Non-search fields use the base field, since it's already a - # keyword field. - my $index = $fname; - $index = "$fname.raw" if $facet->search_field eq 't'; - - $elastic_query->{aggs}{$fname} = {terms => {field => $index}}; + $elastic_query->{aggs}{$fname} = {terms => {field => $fname}}; } } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index a8ed65c1ea..e451259db5 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -39,6 +39,12 @@ my $BASE_INDEX_SETTINGS = { filter => ['lowercase', 'asciifolding'], tokenizer => 'standard' } + }, + normalizer => { + custom_lowercase => { + type => 'custom', + filter => ['lowercase'] + } } } }; @@ -60,50 +66,7 @@ my $BASE_PROPERTIES = { circulate => {type => 'boolean'}, opac_visible => {type => 'boolean'} } - }, - - # Combo fields for field-class level searches. - # The value for every (for example) title|* search field will be - # copied to the "title" field for searching accross all title entries. - title => { - type => 'text', - analyzer => $LANG_ANALYZER, - fields => { - folded => {type => 'text', analyzer => 'folding'} - } - }, - author => { - type => 'text', - analyzer => $LANG_ANALYZER, - fields => { - folded => {type => 'text', analyzer => 'folding'} - } - }, - subject => { - type => 'text', - analyzer => $LANG_ANALYZER, - fields => { - folded => {type => 'text', analyzer => 'folding'} - } - }, - series => { - type => 'text', - analyzer => $LANG_ANALYZER, - fields => { - folded => {type => 'text', analyzer => 'folding'} - } - }, - - keyword => { - type => 'text', - analyzer => $LANG_ANALYZER, - fields => { - folded => {type => 'text', analyzer => 'folding'} - } - }, - - # Avoid full-text analysis on identifer fields. - identifier => {type => 'keyword'} + } }; sub index_name { @@ -138,35 +101,32 @@ sub create_index { my $search_group = $field->search_group; $field_name = "$search_group|$field_name" if $search_group; - my $def; + # Every field gets a keyword index (default) for aggregation and + # a lower-case keyword index (.lower) for sorting and certain + # types of searches (exact match, starts with) + my $def = { + type => 'keyword', + fields => { + lower => { + type => 'keyword', + normalizer => 'custom_lowercase' + } + } + }; + if ($field->search_field eq 't') { - # Search fields get full text indexing and analysis + # Search fields also get full text indexing and analysis + # plus a "folded" variation for ascii folded searches. - $def = { + $def->{fields}->{text} = { type => 'text', - analyzer => $LANG_ANALYZER, - fields => { - folded => {type => 'text', analyzer => 'folding'} - } + analyzer => $LANG_ANALYZER }; - if ($field->facet_field eq 't' || $field->sorter eq 't') { - # If it's also a sort/facet field, add a keyword version - # of the field to use for sorting and aggregation - $def->{fields}{raw} = {type => 'keyword'}; - } - - if ($search_group) { - # Fields in a search group are copied to the group field - # for searching acrosss all fields of a given type. - $def->{copy_to} = $search_group; - } - - } else { - # Non-search fields -- used for sorting, aggregation, - # and "code" (raw value) searches -- are only indexed - # as (non-analyzed) keyword fields. - $def = {type => 'keyword'}; + $def->{fields}->{text_folded} = { + type => 'text', + analyzer => 'folding' + }; } # Apply field boost.