LP1844418 Direct indexing WIP

author Bill Erickson <berickxx@gmail.com>

Fri, 21 Feb 2020 21:13:05 +0000 (16:13 -0500)

committer Bill Erickson <berickxx@gmail.com>

Fri, 21 Feb 2020 21:20:33 +0000 (16:20 -0500)
author Bill Erickson <berickxx@gmail.com>
Fri, 21 Feb 2020 21:13:05 +0000 (16:13 -0500)
committer Bill Erickson <berickxx@gmail.com>
Fri, 21 Feb 2020 21:20:33 +0000 (16:20 -0500)
diff --git a/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts b/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts

index f9630ab..4a2e7ca 100644 (file)
--- a/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts
+++ b/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts
@@ -318,10 +318,8 @@ export class CatalogService {
              return Promise.resolve();
          }
  
-        if (ctx.result.facets) {
-            // No need to fetch pre-compiled facets
-            console.debug('Showing pre-compiled facets');
-            ctx.result.facetData = this.formatFacets(ctx.result.facets);
+        if (this.elastic.enabled && ctx.result.facets) {
+            ctx.result.facetData = this.elastic.formatFacets(ctx.result.facets);
              return Promise.resolve();
          }
  
@@ -370,7 +368,12 @@ export class CatalogService {
  
      checkSearchEngine(): Promise<any> {
          return this.pcrud.retrieve('cgf', 'elastic.bib_search.enabled')
-        .toPromise().then(flag => this.elastic.enabled = flag.enabled() === 't');
+        .toPromise().then(flag => {
+            if (flag && flag.enabled() == 't') {
+                this.elastic.enabled = true;
+                return this.elastic.init();
+            }
+        });
      }
  
      fetchCcvms(): Promise<void> {
diff --git a/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts b/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts

index e20e984..724c671 100644 (file)
--- a/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts
+++ b/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts
@@ -13,6 +13,7 @@ import {RequestBodySearch, MatchQuery, MultiMatchQuery, TermsQuery, Query, Sort,
  export class ElasticService {
  
      enabled: boolean;
+    ebfMap: {[id: number]: IdlObject} = {};
  
      constructor(
          private idl: IdlService,
@@ -21,6 +22,12 @@ export class ElasticService {
          private pcrud: PcrudService
      ) {}
  
+    init(): Promise<any> {
+        return this.pcrud.retrieveAll('ebf',
+            {select: {ebf: ["id", "name", "field_class", "label"]}}
+        ).pipe(tap(field => this.ebfMap[field.id()] = field)).toPromise();
+    }
+
      // Returns true if Elastic can provide search results.
      canSearch(ctx: CatalogSearchContext): boolean {
          if (!this.enabled) { return false; }
@@ -310,5 +317,35 @@ export class ElasticService {
                  return;
          }
      }
+
+    // Elastic facets are grouped by elastic.bib_field entries.
+    formatFacets(facets: any) {
+        const facetData = {};
+        Object.keys(facets).forEach(ebfId => {
+            const facetHash = facets[ebfId];
+            const ebf = this.ebfMap[ebfId];
+
+            const ebfData = [];
+            Object.keys(facetHash).forEach(value => {
+                const count = facetHash[value];
+                ebfData.push({value : value, count : count});
+            });
+
+            if (!facetData[ebf.field_class()]) {
+                facetData[ebf.field_class()] = {};
+            }
+
+            facetData[ebf.field_class()][ebf.name()] = {
+                ebfLabel : ebf.label(),
+                valueList : ebfData.sort((a, b) => {
+                    if (a.count > b.count) { return -1; }
+                    if (a.count < b.count) { return 1; }
+                    return a.value < b.value ? -1 : 1;
+                })
+            };
+        });
+
+        return facetData;
+    }
  }
  
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm

index 4c29f38..87442f0 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm
@@ -276,7 +276,7 @@ sub compile_elastic_query {
  }
  
  # Format ES search aggregations to match the API response facet structure
-# {$cmf_id => {"Value" => $count}, $cmf_id2 => {"Value Two" => $count2}, ...}
+# {$field_id => {"Value" => $count}, $field_id2 => {"Value Two" => $count2}, ...}
  sub format_facets {
      my $aggregations = shift;
      my $facets = {}; 
@@ -289,7 +289,7 @@ sub format_facets {
              $_->name eq $name && $_->field_class eq $field_class
          } @$bib_fields;
  
-        my $hash = $facets->{$bib_field->metabib_field} = {};
+        my $hash = $facets->{$bib_field->id} = {};
  
          my $values = $aggregations->{$fname}->{buckets};
          for my $bucket (@$values) {
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm

index 5f67427..c8cc731 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm
@@ -276,6 +276,26 @@ sub delete_index {
          $logger->warn("ES index '$index' ".
              "does not exist in cluster '".$self->cluster."'");
      }
+
+    my $e = new_editor(xact => 1);
+    my $conf = $self->find_index_config;
+
+    if (!$conf) {
+        $e->rollback;
+        return;
+    }
+
+    # Remove from EG database
+    $e->delete_elastic_index($conf) or return $e->die_event;
+    $e->commit;
+
+    # Remove from local cache
+    $self->indices([
+        grep { 
+            $_->name ne $self->index_name ||
+            $_->index_class ne $self->index_class
+        } @{$self->indices}
+    ]);
  }
  
  # Remove multiple documents from the index by ID.
diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm

index 88bc552..fa7f439 100644 (file)
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
@@ -366,6 +366,7 @@ sub create_index_properties {
  
                  # Use the same fields and analysis as the 'grouped' field.
                  $def = clone($properties->{$field_class});
+
                  # Copy grouped fields into their group parent field.
                  $def->{copy_to} = $field_class;
  
@@ -479,6 +480,9 @@ sub create_one_field_index {
      my ($self, $field, $properties) = @_;
      my $index_name = $self->index_name;
      $logger->info("ES Creating index mapping for field $field");
+    if ($field eq 'author') {
+        $logger->info("ES Def Is: " . OpenSRF::Utils::JSON->perl2JSON($properties));
+    }
  
      eval { 
          $self->es->indices->put_mapping({
@@ -518,7 +522,7 @@ sub get_bib_field_for_data {
          ($_->search_field eq 't' && $field->{purpose} eq 'search') ||
          ($_->facet_field eq 't' && $field->{purpose} eq 'facet') ||
          ($_->filter eq 't' && $field->{purpose} eq 'filter') ||
-        ($_->sorterd eq 't' && $field->{purpose} eq 'sorter')
+        ($_->sorter eq 't' && $field->{purpose} eq 'sorter')
      } @matches;
  
      if (!$match) {
@@ -575,6 +579,10 @@ sub populate_bib_index_batch {
  
          my $first = 1;
          for my $field (@fields) {
+
+            # Ignore any data provided by the transform we have
+            # no configuration for.
+            next unless $self->get_bib_field_for_data($bib_fields, $field);
          
              if ($first) {
                  $first = 0;
@@ -594,8 +602,6 @@ sub populate_bib_index_batch {
  
              next unless defined $value && $value ne '';
  
-            next unless $self->get_bib_field_for_data($bib_fields, $field);
-
              $fname = "$fclass|$fname" if $fclass;
              $fname = "$fname|facet" if $field->{purpose} eq 'facet';
  
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql

index a9af281..bd9a251 100644 (file)
--- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql
@@ -45,6 +45,9 @@ CREATE TABLE elastic.index (
      CONSTRAINT    valid_index_class CHECK (index_class IN ('bib-search'))
  );
  
+CREATE UNIQUE INDEX active_index_once_per_cluster 
+    ON elastic.index (index_class, cluster) WHERE active is TRUE;
+
  -- XXX consider storing the xsl chunk directly on the field,
  -- then stitching the chunks together for indexing.  This would
  -- require a search chunk and a facet chunk.
@@ -191,11 +194,11 @@ VALUES
          FALSE, FALSE, TRUE, FALSE, 1),
      (NULL, 'vr_format', 'Video Recording Format', 
          FALSE, FALSE, TRUE, FALSE, 1),
-    (NULL, 'author', 'Author Sort', 
+    (NULL, 'authorsort', 'Author Sort', 
          FALSE, FALSE, FALSE, TRUE, 1),
      (NULL, 'pubdate', 'Pubdate Sort', 
          FALSE, FALSE, FALSE, TRUE, 1),
-    (NULL, 'title', 'Title Sort', 
+    (NULL, 'titlesort', 'Title Sort', 
          FALSE, FALSE, FALSE, TRUE, 1)
  ;
  
@@ -211,6 +214,10 @@ DELETE FROM config.global_flag WHERE name ~ 'elastic.*';
  
  /*
  
+-- Testing
+
+UPDATE config.global_flag SET enabled = TRUE WHERE name ~ '^elastic.*';
+
  -- Bill's elastic VM for testing.
  UPDATE elastic.node
      SET host = 'elastic.gamma', port = 80, path = '/elastic/node1'
diff --git a/Open-ILS/xsl/elastic-bib-transform.xsl b/Open-ILS/xsl/elastic-bib-transform.xsl

index a1466ad..62cfbcb 100644 (file)
--- a/Open-ILS/xsl/elastic-bib-transform.xsl
+++ b/Open-ILS/xsl/elastic-bib-transform.xsl
@@ -618,7 +618,7 @@
        <xsl:sort select="@tag"/>
        <xsl:if test="position() = 1">
          <xsl:call-template name="add_sorter_entry">
-          <xsl:with-param name="name">author</xsl:with-param>
+          <xsl:with-param name="name">authorsort</xsl:with-param>
            <xsl:with-param name="value">
              <xsl:call-template name="subfieldSelect"></xsl:call-template>
            </xsl:with-param>
@@ -644,7 +644,7 @@
          </xsl:choose>
        </xsl:variable>
        <xsl:call-template name="add_sorter_entry">
-        <xsl:with-param name="name">title</xsl:with-param>
+        <xsl:with-param name="name">titlesort</xsl:with-param>
          <xsl:with-param name="value" select="substring($full_title, $offset + 1)" />
        </xsl:call-template>
      </xsl:for-each>
diff --git a/docs/TechRef/elasticsearch.adoc b/docs/TechRef/elasticsearch.adoc

index 852e69a..298ce48 100644 (file)
--- a/docs/TechRef/elasticsearch.adoc
+++ b/docs/TechRef/elasticsearch.adoc
@@ -3,7 +3,7 @@
  == Goals ==
  
  Fast bib record searching without requiring significant changes to
-the Evergreen code and without requiring a brand new indexing configuration.
+existing Evergreen search and display code.
  
  Initially support integration with the Angular staff catalog, covering 
  most search features commonly used by staff.
@@ -26,12 +26,12 @@ See database tables in the 'elastic' schema.  No admin UI exists.  If a
  single elasticsearch node is running on the same server as EG, no 
  configuration changes are needed.
  
-To enable Elasticsearch for bib searching, modify the Evergreen global 
-flag in the database.
+To enable Elasticsearch for bib indexing and searching, modify the related
+global flags:
  
  [source,sql]
  ------------------------------------------------------------------------------
-UPDATE config.global_flag SET enabled = true WHERE name = 'elastic.bib_search.enabled';
+UPDATE config.global_flag SET enabled = true WHERE name ~ '^elastic.*';
  ------------------------------------------------------------------------------
  
  == Indexing Bib Records ==
@@ -42,37 +42,24 @@ Examples:
  
  [source,sh]
  ------------------------------------------------------------------------------
-./elastic-index.pl --create-index --populate
-./elastic-index.pl --delete-index --create-index --populate
-./elastic-index.pl --populate --modified-since 2019-09-17T14:45:00
+./elastic-index.pl --index-name my-bib-index --create-index --populate
+./elastic-index.pl --index-name my-bib-index --delete-index --create-index --populate
+./elastic-index.pl --index-name my-bib-index --populate --modified-since 2019-09-17T14:45:00
  ------------------------------------------------------------------------------
  
  == Bib Search Index ==
  
-A single 'bib-search' index is defined by default.  The structure of the index
-is derived from the local Evergreen index definitions.  No additional index
-definitions or modifications are required to get started.
-
-=== General Stucture ===
-
-The bib-search index contains 3 general categories of data for each 
-bib record: 
-
-1. Bib record search/filter data pulled from metabib fields and record 
-   attribute definitions
-2. MARC record data
-3. Holdings summaries for filtering by library, availability, etc.
+A single 'bib-search' index class is defined by default.  The structure
+of indexes defined for this class is defined by entries in the
+elastic.bib_field table.  A default bib record transform file is located
+at Open-ILS/xsl/elastic-bib-transform.xsl for extracting data from the
+MARCXML data for each indexed record.
  
  === Search Fields ===
  
  Search fields are grouped by search class (title, author, etc.).  Searches
  can be performed against a specific field or across the class.
  
-Search field values are extracted from metabib.*_field_entry tables
-and reindexed in Elasticsearch using a combination of text and keyword
-analyzers: default text, language-specific text, asciifolding text
-(e.g. Grandpré => Grandpre) and lowercase keyword (for exact matches).
-
  ==== Caveats ====
  
  * Author fields are not presently indexed with language-specific analyzers, 
@@ -84,13 +71,9 @@ analyzers: default text, language-specific text, asciifolding text
  
  === Facet Fields ===
  
-Field marked as facets get an extra '.facet' property which is a raw, 
-unprocessed copy of the data used for aggregation.
-
-=== Filter Fields ===
-
-These concist of record attribute values and are indexed as simple
-'keyword' entries, lowercased for ease of searching / filtering.
+Field marked as facets are tracked via a separate '$field_class|$name|facet'
+which contains only an unprocessed version of the facet-specific data
+output from the record transform.
  
  === MARC Data ===
  
@@ -138,7 +121,8 @@ It uses the elastic-builder module for creating the search structures.
  == Features Pending ==
  
  Some existing Evergreen features are not supported by the ES API, though in
-most if not all cases they can be added.
+most cases it should be possible to add them.
  
  * Popularity ranking
+* Search Highlighting
author	Bill Erickson <berickxx@gmail.com>
	Fri, 21 Feb 2020 21:13:05 +0000 (16:13 -0500)
committer	Bill Erickson <berickxx@gmail.com>
	Fri, 21 Feb 2020 21:20:33 +0000 (16:20 -0500)
Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts		patch \| blob \| history
Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts		patch \| blob \| history
Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm		patch \| blob \| history
Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm		patch \| blob \| history
Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm		patch \| blob \| history
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql		patch \| blob \| history
Open-ILS/xsl/elastic-bib-transform.xsl		patch \| blob \| history
docs/TechRef/elasticsearch.adoc		patch \| blob \| history