From 69d3a9e5f01553a70ea4ea7505b53ac7af2a1305 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Fri, 21 Feb 2020 16:13:05 -0500 Subject: [PATCH] LP1844418 Direct indexing WIP Signed-off-by: Bill Erickson --- .../eg2/src/app/share/catalog/catalog.service.ts | 13 +++--- .../eg2/src/app/share/catalog/elastic.service.ts | 37 ++++++++++++++++ .../lib/OpenILS/Application/Search/Elastic.pm | 4 +- Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm | 20 +++++++++ .../src/perlmods/lib/OpenILS/Elastic/BibSearch.pm | 12 ++++-- .../sql/Pg/upgrade/XXXX.schema.elastic-search.sql | 11 ++++- Open-ILS/xsl/elastic-bib-transform.xsl | 4 +- docs/TechRef/elasticsearch.adoc | 50 ++++++++-------------- 8 files changed, 104 insertions(+), 47 deletions(-) diff --git a/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts b/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts index f9630abd1f..4a2e7caf9c 100644 --- a/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts +++ b/Open-ILS/src/eg2/src/app/share/catalog/catalog.service.ts @@ -318,10 +318,8 @@ export class CatalogService { return Promise.resolve(); } - if (ctx.result.facets) { - // No need to fetch pre-compiled facets - console.debug('Showing pre-compiled facets'); - ctx.result.facetData = this.formatFacets(ctx.result.facets); + if (this.elastic.enabled && ctx.result.facets) { + ctx.result.facetData = this.elastic.formatFacets(ctx.result.facets); return Promise.resolve(); } @@ -370,7 +368,12 @@ export class CatalogService { checkSearchEngine(): Promise { return this.pcrud.retrieve('cgf', 'elastic.bib_search.enabled') - .toPromise().then(flag => this.elastic.enabled = flag.enabled() === 't'); + .toPromise().then(flag => { + if (flag && flag.enabled() == 't') { + this.elastic.enabled = true; + return this.elastic.init(); + } + }); } fetchCcvms(): Promise { diff --git a/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts b/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts index e20e984bb6..724c671547 100644 --- a/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts +++ b/Open-ILS/src/eg2/src/app/share/catalog/elastic.service.ts @@ -13,6 +13,7 @@ import {RequestBodySearch, MatchQuery, MultiMatchQuery, TermsQuery, Query, Sort, export class ElasticService { enabled: boolean; + ebfMap: {[id: number]: IdlObject} = {}; constructor( private idl: IdlService, @@ -21,6 +22,12 @@ export class ElasticService { private pcrud: PcrudService ) {} + init(): Promise { + return this.pcrud.retrieveAll('ebf', + {select: {ebf: ["id", "name", "field_class", "label"]}} + ).pipe(tap(field => this.ebfMap[field.id()] = field)).toPromise(); + } + // Returns true if Elastic can provide search results. canSearch(ctx: CatalogSearchContext): boolean { if (!this.enabled) { return false; } @@ -310,5 +317,35 @@ export class ElasticService { return; } } + + // Elastic facets are grouped by elastic.bib_field entries. + formatFacets(facets: any) { + const facetData = {}; + Object.keys(facets).forEach(ebfId => { + const facetHash = facets[ebfId]; + const ebf = this.ebfMap[ebfId]; + + const ebfData = []; + Object.keys(facetHash).forEach(value => { + const count = facetHash[value]; + ebfData.push({value : value, count : count}); + }); + + if (!facetData[ebf.field_class()]) { + facetData[ebf.field_class()] = {}; + } + + facetData[ebf.field_class()][ebf.name()] = { + ebfLabel : ebf.label(), + valueList : ebfData.sort((a, b) => { + if (a.count > b.count) { return -1; } + if (a.count < b.count) { return 1; } + return a.value < b.value ? -1 : 1; + }) + }; + }); + + return facetData; + } } diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm index 4c29f38471..87442f0724 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm @@ -276,7 +276,7 @@ sub compile_elastic_query { } # Format ES search aggregations to match the API response facet structure -# {$cmf_id => {"Value" => $count}, $cmf_id2 => {"Value Two" => $count2}, ...} +# {$field_id => {"Value" => $count}, $field_id2 => {"Value Two" => $count2}, ...} sub format_facets { my $aggregations = shift; my $facets = {}; @@ -289,7 +289,7 @@ sub format_facets { $_->name eq $name && $_->field_class eq $field_class } @$bib_fields; - my $hash = $facets->{$bib_field->metabib_field} = {}; + my $hash = $facets->{$bib_field->id} = {}; my $values = $aggregations->{$fname}->{buckets}; for my $bucket (@$values) { diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm index 5f67427938..c8cc731659 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm @@ -276,6 +276,26 @@ sub delete_index { $logger->warn("ES index '$index' ". "does not exist in cluster '".$self->cluster."'"); } + + my $e = new_editor(xact => 1); + my $conf = $self->find_index_config; + + if (!$conf) { + $e->rollback; + return; + } + + # Remove from EG database + $e->delete_elastic_index($conf) or return $e->die_event; + $e->commit; + + # Remove from local cache + $self->indices([ + grep { + $_->name ne $self->index_name || + $_->index_class ne $self->index_class + } @{$self->indices} + ]); } # Remove multiple documents from the index by ID. diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index 88bc552340..fa7f4397cd 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -366,6 +366,7 @@ sub create_index_properties { # Use the same fields and analysis as the 'grouped' field. $def = clone($properties->{$field_class}); + # Copy grouped fields into their group parent field. $def->{copy_to} = $field_class; @@ -479,6 +480,9 @@ sub create_one_field_index { my ($self, $field, $properties) = @_; my $index_name = $self->index_name; $logger->info("ES Creating index mapping for field $field"); + if ($field eq 'author') { + $logger->info("ES Def Is: " . OpenSRF::Utils::JSON->perl2JSON($properties)); + } eval { $self->es->indices->put_mapping({ @@ -518,7 +522,7 @@ sub get_bib_field_for_data { ($_->search_field eq 't' && $field->{purpose} eq 'search') || ($_->facet_field eq 't' && $field->{purpose} eq 'facet') || ($_->filter eq 't' && $field->{purpose} eq 'filter') || - ($_->sorterd eq 't' && $field->{purpose} eq 'sorter') + ($_->sorter eq 't' && $field->{purpose} eq 'sorter') } @matches; if (!$match) { @@ -575,6 +579,10 @@ sub populate_bib_index_batch { my $first = 1; for my $field (@fields) { + + # Ignore any data provided by the transform we have + # no configuration for. + next unless $self->get_bib_field_for_data($bib_fields, $field); if ($first) { $first = 0; @@ -594,8 +602,6 @@ sub populate_bib_index_batch { next unless defined $value && $value ne ''; - next unless $self->get_bib_field_for_data($bib_fields, $field); - $fname = "$fclass|$fname" if $fclass; $fname = "$fname|facet" if $field->{purpose} eq 'facet'; diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql index a9af281a4e..bd9a2514cf 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql @@ -45,6 +45,9 @@ CREATE TABLE elastic.index ( CONSTRAINT valid_index_class CHECK (index_class IN ('bib-search')) ); +CREATE UNIQUE INDEX active_index_once_per_cluster + ON elastic.index (index_class, cluster) WHERE active is TRUE; + -- XXX consider storing the xsl chunk directly on the field, -- then stitching the chunks together for indexing. This would -- require a search chunk and a facet chunk. @@ -191,11 +194,11 @@ VALUES FALSE, FALSE, TRUE, FALSE, 1), (NULL, 'vr_format', 'Video Recording Format', FALSE, FALSE, TRUE, FALSE, 1), - (NULL, 'author', 'Author Sort', + (NULL, 'authorsort', 'Author Sort', FALSE, FALSE, FALSE, TRUE, 1), (NULL, 'pubdate', 'Pubdate Sort', FALSE, FALSE, FALSE, TRUE, 1), - (NULL, 'title', 'Title Sort', + (NULL, 'titlesort', 'Title Sort', FALSE, FALSE, FALSE, TRUE, 1) ; @@ -211,6 +214,10 @@ DELETE FROM config.global_flag WHERE name ~ 'elastic.*'; /* +-- Testing + +UPDATE config.global_flag SET enabled = TRUE WHERE name ~ '^elastic.*'; + -- Bill's elastic VM for testing. UPDATE elastic.node SET host = 'elastic.gamma', port = 80, path = '/elastic/node1' diff --git a/Open-ILS/xsl/elastic-bib-transform.xsl b/Open-ILS/xsl/elastic-bib-transform.xsl index a1466ad386..62cfbcb9bb 100644 --- a/Open-ILS/xsl/elastic-bib-transform.xsl +++ b/Open-ILS/xsl/elastic-bib-transform.xsl @@ -618,7 +618,7 @@ - author + authorsort @@ -644,7 +644,7 @@ - title + titlesort diff --git a/docs/TechRef/elasticsearch.adoc b/docs/TechRef/elasticsearch.adoc index 852e69ac5f..298ce487de 100644 --- a/docs/TechRef/elasticsearch.adoc +++ b/docs/TechRef/elasticsearch.adoc @@ -3,7 +3,7 @@ == Goals == Fast bib record searching without requiring significant changes to -the Evergreen code and without requiring a brand new indexing configuration. +existing Evergreen search and display code. Initially support integration with the Angular staff catalog, covering most search features commonly used by staff. @@ -26,12 +26,12 @@ See database tables in the 'elastic' schema. No admin UI exists. If a single elasticsearch node is running on the same server as EG, no configuration changes are needed. -To enable Elasticsearch for bib searching, modify the Evergreen global -flag in the database. +To enable Elasticsearch for bib indexing and searching, modify the related +global flags: [source,sql] ------------------------------------------------------------------------------ -UPDATE config.global_flag SET enabled = true WHERE name = 'elastic.bib_search.enabled'; +UPDATE config.global_flag SET enabled = true WHERE name ~ '^elastic.*'; ------------------------------------------------------------------------------ == Indexing Bib Records == @@ -42,37 +42,24 @@ Examples: [source,sh] ------------------------------------------------------------------------------ -./elastic-index.pl --create-index --populate -./elastic-index.pl --delete-index --create-index --populate -./elastic-index.pl --populate --modified-since 2019-09-17T14:45:00 +./elastic-index.pl --index-name my-bib-index --create-index --populate +./elastic-index.pl --index-name my-bib-index --delete-index --create-index --populate +./elastic-index.pl --index-name my-bib-index --populate --modified-since 2019-09-17T14:45:00 ------------------------------------------------------------------------------ == Bib Search Index == -A single 'bib-search' index is defined by default. The structure of the index -is derived from the local Evergreen index definitions. No additional index -definitions or modifications are required to get started. - -=== General Stucture === - -The bib-search index contains 3 general categories of data for each -bib record: - -1. Bib record search/filter data pulled from metabib fields and record - attribute definitions -2. MARC record data -3. Holdings summaries for filtering by library, availability, etc. +A single 'bib-search' index class is defined by default. The structure +of indexes defined for this class is defined by entries in the +elastic.bib_field table. A default bib record transform file is located +at Open-ILS/xsl/elastic-bib-transform.xsl for extracting data from the +MARCXML data for each indexed record. === Search Fields === Search fields are grouped by search class (title, author, etc.). Searches can be performed against a specific field or across the class. -Search field values are extracted from metabib.*_field_entry tables -and reindexed in Elasticsearch using a combination of text and keyword -analyzers: default text, language-specific text, asciifolding text -(e.g. Grandpré => Grandpre) and lowercase keyword (for exact matches). - ==== Caveats ==== * Author fields are not presently indexed with language-specific analyzers, @@ -84,13 +71,9 @@ analyzers: default text, language-specific text, asciifolding text === Facet Fields === -Field marked as facets get an extra '.facet' property which is a raw, -unprocessed copy of the data used for aggregation. - -=== Filter Fields === - -These concist of record attribute values and are indexed as simple -'keyword' entries, lowercased for ease of searching / filtering. +Field marked as facets are tracked via a separate '$field_class|$name|facet' +which contains only an unprocessed version of the facet-specific data +output from the record transform. === MARC Data === @@ -138,7 +121,8 @@ It uses the elastic-builder module for creating the search structures. == Features Pending == Some existing Evergreen features are not supported by the ES API, though in -most if not all cases they can be added. +most cases it should be possible to add them. * Popularity ranking +* Search Highlighting -- 2.11.0