initial support multiple lang analyzers; multi_match queries

author Bill Erickson <berickxx@gmail.com>

Thu, 5 Sep 2019 16:42:18 +0000 (12:42 -0400)

committer Bill Erickson <berickxx@gmail.com>

Fri, 17 Jan 2020 19:36:02 +0000 (14:36 -0500)
author Bill Erickson <berickxx@gmail.com>
Thu, 5 Sep 2019 16:42:18 +0000 (12:42 -0400)
committer Bill Erickson <berickxx@gmail.com>
Fri, 17 Jan 2020 19:36:02 +0000 (14:36 -0500)
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm

index 1bfedeb..1d1dcdc 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm
@@ -286,67 +286,59 @@ sub translate_query_node {
          my $last_char = substr($content, -1, 1);
          my $prefix = $children->[0]->{prefix};
  
-        my $match_op = 'match';
+        my $match_type = 'most_fields';
  
          # "Contains Phrase"
-        $match_op = 'match_phrase' if $prefix eq '"';
+        $match_type = 'phrase' if $prefix eq '"';
  
-        # Should we use the .raw keyword field?
-        my $text_search = 1;
+        my @field_nodes;
  
          # Matchiness specificiers embedded in the content override
          # the query node prefix.
          if ($first_char eq '^') {
-            $text_search = 0;
              $content = substr($content, 1);
  
              if ($last_char eq '$') { # "Matches Exactly" 
  
-                $match_op = 'term';
+                $match_type = undef;
                  $content = substr($content, 0, -1);
  
+                for my $field (@fields) {
+                    my $key = "$field_class|$field";
+                    # Use the lowercase normalized keyword index for 
+                    # exact match searches.
+                    push(@field_nodes, {term => {"$key.lower" => $content}});
+                }
+
              } else { # "Starts With"
  
-                $match_op = 'match_phrase_prefix';
+                $match_type = 'phrase_prefix';
              }
          }
  
-        # for match queries, treat multi-word search as AND searches
-        # instead of the default ES OR searches.
-        $content = {query => $content, operator => 'and'} 
-            if $match_op eq 'match';
-
-        my $field_nodes = [];
-        for my $field (@fields) {
-            my $key = "$field_class|$field";
+        if ($match_type) {
  
-            if ($text_search) {
-                # use the full-text indices
-                
-                push(@$field_nodes, 
-                    {$match_op => {"$key.text" => $content}});
-
-                push(@$field_nodes, 
-                    {$match_op => {"$key.text_folded" => $content}});
-
-            } else {
-
-                # Use the lowercase normalized keyword index for non-text searches.
-                push(@$field_nodes, {$match_op => {"$key.lower" => $content}});
-            }
+            push(@field_nodes, {
+                multi_match => {
+                    query => $content,
+                    operator => 'and',
+                    fields => ["$field_class|*.text*"],
+                    type => $match_type
+                }
+            });
          }
  
          $logger->info(
              "ES content = ". OpenSRF::Utils::JSON->perl2JSON($content) . 
-            "; bools = ". OpenSRF::Utils::JSON->perl2JSON($field_nodes)
+            "; bools = ". OpenSRF::Utils::JSON->perl2JSON(\@field_nodes)
          );
  
          my $query;
-        if (scalar(@$field_nodes) == 1) {
-            $query = {bool => {must => $field_nodes}};
+        if (scalar(@field_nodes) == 1) {
+            $query = {bool => {must => \@field_nodes}};
          } else {
              # Query multiple fields within a search class via OR query.
-            $query = {bool => {should => $field_nodes}};
+            $query = {bool => {should => \@field_nodes}};
          }
  
          if ($prefix eq '-"') {
@@ -573,13 +565,11 @@ sub compile_elastic_marc_query {
  
          # Use text searching on the value field
          my $value_query = {
-            bool => {
-                should => [
-                    {match => {'marc.value.text' => 
-                        {query => $value, operator => 'and'}}},
-                    {match => {'marc.value.text_folded' => 
-                        {query => $value, operator => 'and'}}}
-                ]
+            multi_match => {
+                query => $value,
+                fields => ['marc.value*'],
+                type => 'most_fields',
+                operator => 'and'
              }
          };
  
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm

index 181d6c0..b4d8f73 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm
@@ -60,6 +60,11 @@ sub index_name {
      die "Index name must be provided by sub-class\n";
  }
  
+sub language_analyzers {
+    # Override in subclass as needed
+    return ("english");
+}
+
  # Provide a direct DB connection so some high-volume activities,
  # like indexing bib records, can take advantage of a direct connection.
  # Returns database connection object -- connects if necessary.
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm

index 55da7d6..7c5d92c 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm
@@ -30,9 +30,6 @@ my $INDEX_NAME = 'bib-search';
  # number of bibs to index per batch.
  my $BIB_BATCH_SIZE = 500;
  
-# TODO: it's possible to apply multiple language analyzers.
-my $LANG_ANALYZER = 'english';
-
  my $BASE_INDEX_SETTINGS = {
      analysis => {
          analyzer => {
@@ -71,44 +68,36 @@ my $BASE_PROPERTIES = {
      marc => {
          type => 'nested',
          properties => {
-            # tag is assumed to be composed of numbers, so no lowercase.
-            tag => {type => 'keyword'},
+            tag => {
+                type => 'keyword',
+                normalizer => 'custom_lowercase'
+            },
              subfield => {
                  type => 'keyword',
-                fields => {
-                    lower => {
-                        type => 'keyword', 
-                        normalizer => 'custom_lowercase'
-                    }
-                }
+                normalizer => 'custom_lowercase'
              },
              value => {
-                type => 'keyword',
+                type => 'text',
                  fields => {
-                    lower => {
-                        type => 'keyword', 
-                        normalizer => 'custom_lowercase'
-                    },
-                    text => {
-                        type => 'text',
-                        analyzer => $LANG_ANALYZER
-                    },
                      text_folded => {
                          type => 'text',
                          analyzer => 'folding'
                      }
                  }
              }
- 
          }
      }
-
  };
  
  sub index_name {
      return $INDEX_NAME;
  }
  
+# TODO: add index-specific language analyzers to DB config
+sub language_analyzers {
+    return ("english");
+}
+
  sub create_index {
      my ($self) = @_;
  
@@ -122,6 +111,14 @@ sub create_index {
  
      my $mappings = $BASE_PROPERTIES;
  
+    # Add the language analyzers to the MARC mappings
+    for my $lang_analyzer ($self->language_analyzers) {
+        $mappings->{marc}->{properties}->{value}->{fields}->{"text_$lang_analyzer"} = {
+            type => 'text',
+            analyzer => $lang_analyzer
+        };
+    }
+
      my $fields = new_editor()->retrieve_all_elastic_bib_field();
  
      for my $field (@$fields) {
@@ -147,15 +144,20 @@ sub create_index {
              # Search fields also get full text indexing and analysis
              # plus a "folded" variation for ascii folded searches.
  
-            $def->{fields}->{text} = {
-                type => 'text',
-                analyzer => $LANG_ANALYZER
-            };
+            $def->{fields}->{text} = {type => 'text'};
  
              $def->{fields}->{text_folded} = {
                  type => 'text', 
                  analyzer => 'folding'
              };
+
+            # Add the language analyzers
+            for my $lang_analyzer ($self->language_analyzers) {
+                $def->{fields}->{"text_$lang_analyzer"} = {
+                    type => 'text',
+                    analyzer => $lang_analyzer
+                };
+            }
          }
  
          # Apply field boost.
diff --git a/Open-ILS/src/support-scripts/test-scripts/elastic-search.pl b/Open-ILS/src/support-scripts/test-scripts/elastic-search.pl

index 6562051..9cfdcf9 100755 (executable)
--- a/Open-ILS/src/support-scripts/test-scripts/elastic-search.pl
+++ b/Open-ILS/src/support-scripts/test-scripts/elastic-search.pl
@@ -41,7 +41,7 @@ Fieldmapper->import(
      IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
  OpenILS::Utils::CStoreEditor::init();
  
-# Title search AND author search AND MARC tag=100 search
+# Title search AND subject search AND MARC tag=100 search
  my $query = {
    _source => ['id', 'title|proper'] , # return only the ID field
    from => 0,
@@ -74,7 +74,7 @@ my $query = {
                must => [{
                  multi_match => {
                    query => 'cline',
-                  fields => ['marc.value.text*'],
+                  fields => ['marc.value*'],
                    operator => 'and',
                    type => 'most_fields'
                  }
author	Bill Erickson <berickxx@gmail.com>
	Thu, 5 Sep 2019 16:42:18 +0000 (12:42 -0400)
committer	Bill Erickson <berickxx@gmail.com>
	Fri, 17 Jan 2020 19:36:02 +0000 (14:36 -0500)
Open-ILS/src/perlmods/lib/OpenILS/Application/Search/ElasticMapper.pm		patch \| blob \| history
Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm		patch \| blob \| history
Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm		patch \| blob \| history
Open-ILS/src/support-scripts/test-scripts/elastic-search.pl		patch \| blob \| history