return Promise.resolve();
}
- if (ctx.result.facets) {
- // No need to fetch pre-compiled facets
- console.debug('Showing pre-compiled facets');
- ctx.result.facetData = this.formatFacets(ctx.result.facets);
+ if (this.elastic.enabled && ctx.result.facets) {
+ ctx.result.facetData = this.elastic.formatFacets(ctx.result.facets);
return Promise.resolve();
}
checkSearchEngine(): Promise<any> {
return this.pcrud.retrieve('cgf', 'elastic.bib_search.enabled')
- .toPromise().then(flag => this.elastic.enabled = flag.enabled() === 't');
+ .toPromise().then(flag => {
+ if (flag && flag.enabled() == 't') {
+ this.elastic.enabled = true;
+ return this.elastic.init();
+ }
+ });
}
fetchCcvms(): Promise<void> {
export class ElasticService {
enabled: boolean;
+ ebfMap: {[id: number]: IdlObject} = {};
constructor(
private idl: IdlService,
private pcrud: PcrudService
) {}
+ init(): Promise<any> {
+ return this.pcrud.retrieveAll('ebf',
+ {select: {ebf: ["id", "name", "field_class", "label"]}}
+ ).pipe(tap(field => this.ebfMap[field.id()] = field)).toPromise();
+ }
+
// Returns true if Elastic can provide search results.
canSearch(ctx: CatalogSearchContext): boolean {
if (!this.enabled) { return false; }
return;
}
}
+
+ // Elastic facets are grouped by elastic.bib_field entries.
+ formatFacets(facets: any) {
+ const facetData = {};
+ Object.keys(facets).forEach(ebfId => {
+ const facetHash = facets[ebfId];
+ const ebf = this.ebfMap[ebfId];
+
+ const ebfData = [];
+ Object.keys(facetHash).forEach(value => {
+ const count = facetHash[value];
+ ebfData.push({value : value, count : count});
+ });
+
+ if (!facetData[ebf.field_class()]) {
+ facetData[ebf.field_class()] = {};
+ }
+
+ facetData[ebf.field_class()][ebf.name()] = {
+ ebfLabel : ebf.label(),
+ valueList : ebfData.sort((a, b) => {
+ if (a.count > b.count) { return -1; }
+ if (a.count < b.count) { return 1; }
+ return a.value < b.value ? -1 : 1;
+ })
+ };
+ });
+
+ return facetData;
+ }
}
}
# Format ES search aggregations to match the API response facet structure
-# {$cmf_id => {"Value" => $count}, $cmf_id2 => {"Value Two" => $count2}, ...}
+# {$field_id => {"Value" => $count}, $field_id2 => {"Value Two" => $count2}, ...}
sub format_facets {
my $aggregations = shift;
my $facets = {};
$_->name eq $name && $_->field_class eq $field_class
} @$bib_fields;
- my $hash = $facets->{$bib_field->metabib_field} = {};
+ my $hash = $facets->{$bib_field->id} = {};
my $values = $aggregations->{$fname}->{buckets};
for my $bucket (@$values) {
$logger->warn("ES index '$index' ".
"does not exist in cluster '".$self->cluster."'");
}
+
+ my $e = new_editor(xact => 1);
+ my $conf = $self->find_index_config;
+
+ if (!$conf) {
+ $e->rollback;
+ return;
+ }
+
+ # Remove from EG database
+ $e->delete_elastic_index($conf) or return $e->die_event;
+ $e->commit;
+
+ # Remove from local cache
+ $self->indices([
+ grep {
+ $_->name ne $self->index_name ||
+ $_->index_class ne $self->index_class
+ } @{$self->indices}
+ ]);
}
# Remove multiple documents from the index by ID.
# Use the same fields and analysis as the 'grouped' field.
$def = clone($properties->{$field_class});
+
# Copy grouped fields into their group parent field.
$def->{copy_to} = $field_class;
my ($self, $field, $properties) = @_;
my $index_name = $self->index_name;
$logger->info("ES Creating index mapping for field $field");
+ if ($field eq 'author') {
+ $logger->info("ES Def Is: " . OpenSRF::Utils::JSON->perl2JSON($properties));
+ }
eval {
$self->es->indices->put_mapping({
($_->search_field eq 't' && $field->{purpose} eq 'search') ||
($_->facet_field eq 't' && $field->{purpose} eq 'facet') ||
($_->filter eq 't' && $field->{purpose} eq 'filter') ||
- ($_->sorterd eq 't' && $field->{purpose} eq 'sorter')
+ ($_->sorter eq 't' && $field->{purpose} eq 'sorter')
} @matches;
if (!$match) {
my $first = 1;
for my $field (@fields) {
+
+ # Ignore any data provided by the transform we have
+ # no configuration for.
+ next unless $self->get_bib_field_for_data($bib_fields, $field);
if ($first) {
$first = 0;
next unless defined $value && $value ne '';
- next unless $self->get_bib_field_for_data($bib_fields, $field);
-
$fname = "$fclass|$fname" if $fclass;
$fname = "$fname|facet" if $field->{purpose} eq 'facet';
CONSTRAINT valid_index_class CHECK (index_class IN ('bib-search'))
);
+CREATE UNIQUE INDEX active_index_once_per_cluster
+ ON elastic.index (index_class, cluster) WHERE active is TRUE;
+
-- XXX consider storing the xsl chunk directly on the field,
-- then stitching the chunks together for indexing. This would
-- require a search chunk and a facet chunk.
FALSE, FALSE, TRUE, FALSE, 1),
(NULL, 'vr_format', 'Video Recording Format',
FALSE, FALSE, TRUE, FALSE, 1),
- (NULL, 'author', 'Author Sort',
+ (NULL, 'authorsort', 'Author Sort',
FALSE, FALSE, FALSE, TRUE, 1),
(NULL, 'pubdate', 'Pubdate Sort',
FALSE, FALSE, FALSE, TRUE, 1),
- (NULL, 'title', 'Title Sort',
+ (NULL, 'titlesort', 'Title Sort',
FALSE, FALSE, FALSE, TRUE, 1)
;
/*
+-- Testing
+
+UPDATE config.global_flag SET enabled = TRUE WHERE name ~ '^elastic.*';
+
-- Bill's elastic VM for testing.
UPDATE elastic.node
SET host = 'elastic.gamma', port = 80, path = '/elastic/node1'
<xsl:sort select="@tag"/>
<xsl:if test="position() = 1">
<xsl:call-template name="add_sorter_entry">
- <xsl:with-param name="name">author</xsl:with-param>
+ <xsl:with-param name="name">authorsort</xsl:with-param>
<xsl:with-param name="value">
<xsl:call-template name="subfieldSelect"></xsl:call-template>
</xsl:with-param>
</xsl:choose>
</xsl:variable>
<xsl:call-template name="add_sorter_entry">
- <xsl:with-param name="name">title</xsl:with-param>
+ <xsl:with-param name="name">titlesort</xsl:with-param>
<xsl:with-param name="value" select="substring($full_title, $offset + 1)" />
</xsl:call-template>
</xsl:for-each>
== Goals ==
Fast bib record searching without requiring significant changes to
-the Evergreen code and without requiring a brand new indexing configuration.
+existing Evergreen search and display code.
Initially support integration with the Angular staff catalog, covering
most search features commonly used by staff.
single elasticsearch node is running on the same server as EG, no
configuration changes are needed.
-To enable Elasticsearch for bib searching, modify the Evergreen global
-flag in the database.
+To enable Elasticsearch for bib indexing and searching, modify the related
+global flags:
[source,sql]
------------------------------------------------------------------------------
-UPDATE config.global_flag SET enabled = true WHERE name = 'elastic.bib_search.enabled';
+UPDATE config.global_flag SET enabled = true WHERE name ~ '^elastic.*';
------------------------------------------------------------------------------
== Indexing Bib Records ==
[source,sh]
------------------------------------------------------------------------------
-./elastic-index.pl --create-index --populate
-./elastic-index.pl --delete-index --create-index --populate
-./elastic-index.pl --populate --modified-since 2019-09-17T14:45:00
+./elastic-index.pl --index-name my-bib-index --create-index --populate
+./elastic-index.pl --index-name my-bib-index --delete-index --create-index --populate
+./elastic-index.pl --index-name my-bib-index --populate --modified-since 2019-09-17T14:45:00
------------------------------------------------------------------------------
== Bib Search Index ==
-A single 'bib-search' index is defined by default. The structure of the index
-is derived from the local Evergreen index definitions. No additional index
-definitions or modifications are required to get started.
-
-=== General Stucture ===
-
-The bib-search index contains 3 general categories of data for each
-bib record:
-
-1. Bib record search/filter data pulled from metabib fields and record
- attribute definitions
-2. MARC record data
-3. Holdings summaries for filtering by library, availability, etc.
+A single 'bib-search' index class is defined by default. The structure
+of indexes defined for this class is defined by entries in the
+elastic.bib_field table. A default bib record transform file is located
+at Open-ILS/xsl/elastic-bib-transform.xsl for extracting data from the
+MARCXML data for each indexed record.
=== Search Fields ===
Search fields are grouped by search class (title, author, etc.). Searches
can be performed against a specific field or across the class.
-Search field values are extracted from metabib.*_field_entry tables
-and reindexed in Elasticsearch using a combination of text and keyword
-analyzers: default text, language-specific text, asciifolding text
-(e.g. Grandpré => Grandpre) and lowercase keyword (for exact matches).
-
==== Caveats ====
* Author fields are not presently indexed with language-specific analyzers,
=== Facet Fields ===
-Field marked as facets get an extra '.facet' property which is a raw,
-unprocessed copy of the data used for aggregation.
-
-=== Filter Fields ===
-
-These concist of record attribute values and are indexed as simple
-'keyword' entries, lowercased for ease of searching / filtering.
+Field marked as facets are tracked via a separate '$field_class|$name|facet'
+which contains only an unprocessed version of the facet-specific data
+output from the record transform.
=== MARC Data ===
== Features Pending ==
Some existing Evergreen features are not supported by the ES API, though in
-most if not all cases they can be added.
+most cases it should be possible to add them.
* Popularity ranking
+* Search Highlighting