From: Bill Erickson Date: Fri, 21 Feb 2020 15:32:43 +0000 (-0500) Subject: LP1844418 Direct indexing experiment WIP X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=63e154d72ca13fb3d60f85fd67625a2bad0054ac;p=working%2FEvergreen.git LP1844418 Direct indexing experiment WIP Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index 0a62b37870..c32473a625 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -12985,16 +12985,16 @@ SELECT usr, controller="open-ils.cstore open-ils.pcrud" oils_obj:fieldmapper="elastic::bib_field" oils_persist:tablename="elastic.bib_field" - reporter:label="Elastic Bib Index field" - oils_persist:readonly="true"> - - + reporter:label="Elastic Bib Index field"> + + - - + + + diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm index 2f725e6006..4c29f38471 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Elastic.pm @@ -19,7 +19,6 @@ use warnings; use OpenSRF::Utils::JSON; use OpenSRF::Utils::Logger qw/:logger/; use OpenILS::Utils::Fieldmapper; -use OpenSRF::Utils::SettingsClient; use OpenILS::Utils::CStoreEditor q/:funcs/; use OpenILS::Elastic::BibSearch; use Digest::MD5 qw(md5_hex); @@ -49,8 +48,7 @@ sub init { my $e = new_editor(); - # no pkey - $bib_fields = $e->search_elastic_bib_field({name => {'!=' => undef}}); + $bib_fields = $e->retrieve_all_elastic_bib_field; my $stats = $e->json_query({ select => {ccs => ['id', 'opac_visible', 'is_available']}, @@ -288,7 +286,7 @@ sub format_facets { my ($field_class, $name) = split(/\|/, $fname); my ($bib_field) = grep { - $_->name eq $name && $_->search_group eq $field_class + $_->name eq $name && $_->field_class eq $field_class } @$bib_fields; my $hash = $facets->{$bib_field->metabib_field} = {}; @@ -312,7 +310,7 @@ sub add_elastic_facet_aggregations { for my $facet (@facet_fields) { my $fname = $facet->name; - my $fgrp = $facet->search_group; + my $fgrp = $facet->field_class; $fname = "$fgrp|$fname" if $fgrp; $elastic_query->{aggs}{$fname} = {terms => {field => "$fname|facet"}}; diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index 39bbcd8350..c1d1de97ce 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -12,45 +12,6 @@ # MERCHANTABILITY or FITNESS FOR A PARTICULAR code. See the # GNU General Public License for more details. # --------------------------------------------------------------- -package OpenILS::Elastic::BibSearch::BibField; -# Models a single indexable field. -use strict; -use warnings; - -sub new { - my ($class, %args) = @_; - return bless(\%args, $class); -} -sub name { - my $self = shift; - return $self->{name}; -} -sub field_class { - my $self = shift; - return $self->{field_class}; -} -sub search_field { - my $self = shift; - return $self->{purpose} eq 'search'; -} -sub facet_field { - my $self = shift; - return $self->{purpose} eq 'facet'; -} -sub sorter { - my $self = shift; - return $self->{purpose} eq 'sorter'; -} -sub filter { - my $self = shift; - return $self->{purpose} eq 'filter'; -} -sub weight { - my $self = shift; - return $self->{weight} || 1; -} - -# --------------------------------------------------------------- package OpenILS::Elastic::BibSearch; use strict; use warnings; @@ -59,6 +20,7 @@ use Clone 'clone'; use Time::HiRes qw/time/; use OpenSRF::Utils::Logger qw/:logger/; use OpenSRF::Utils::JSON; +use OpenSRF::Utils::SettingsClient; use OpenILS::Utils::CStoreEditor qw/:funcs/; use OpenILS::Utils::DateTime qw/interval_to_seconds/; use OpenILS::Elastic; @@ -257,8 +219,31 @@ sub language_analyzers { } sub xsl_file { - my ($self, $filename) = @_; - $self->{xsl_file} = $filename if $filename; + my ($self) = @_; + + if (!$self->{xsl_file}) { + + my $client = OpenSRF::Utils::SettingsClient->new; + my $dir = $client->config_value("dirs", "xsl"); + + my $filename = new_editor()->search_config_global_flag({ + name => 'elastic.bib_search.transform_file', + enabled => 't' + })->[0]; + + if ($filename) { + $self->{xsl_file} = "$dir/" . $filename->value; + + } else { + die <<' TEXT'; + No XSL file provided for Elastic::BibSearch. Confirm + config.global_flag "elastic.bib_search.transform_file" + is enabled, contains a valid value, and the file exists + in the XSL directory. + TEXT + } + } + return $self->{xsl_file}; } @@ -280,52 +265,6 @@ sub xsl_sheet { return $self->{xsl_sheet}; } -my @seen_fields; -sub add_dynamic_field { - my ($self, $fields, $purpose, $field_class, $name, $weight) = @_; - return unless $name; - - $weight = '' if !$weight || $weight eq '_'; - $field_class = '' if !$field_class || $field_class eq '_'; - - my $tag = $purpose . $field_class . $name; - return if grep {$_ eq $tag} @seen_fields; - push(@seen_fields, $tag); - - $logger->info("ES adding dynamic field purpose=$purpose ". - "field_class=$field_class name=$name weight=$weight"); - - my $field = OpenILS::Elastic::BibSearch::BibField->new( - purpose => $purpose, - field_class => $field_class, - name => $name, - weight => $weight - ); - - push(@$fields, $field); -} - -sub get_dynamic_fields { - my $self = shift; - my $fields = []; - - @seen_fields = (); # reset with each run - - # Apply the transform in "target=index-fields" mode to extract just - # the field definitions. - my $null_doc = XML::LibXML->load_xml(string => ''); - my $result = $self->xsl_sheet->transform($null_doc, target => '"index-fields"'); - my $output = $self->xsl_sheet->output_as_chars($result); - - my @rows = split(/\n/, $output); - for my $row (@rows) { - my @parts = split(/ /, $row); - $self->add_dynamic_field($fields, @parts); - } - - return $fields; -} - sub get_bib_data { my ($self, $record_ids) = @_; @@ -341,8 +280,7 @@ sub get_bib_data { } my $marc_doc = XML::LibXML->load_xml(string => $db_rec->{marc}); - my $result = - $self->xsl_sheet->transform($marc_doc, target => '"index-values"'); + my $result = $self->xsl_sheet->transform($marc_doc, target => '"index-values"'); my $output = $self->xsl_sheet->output_as_chars($result); my @rows = split(/\n/, $output); @@ -413,7 +351,7 @@ sub create_index_properties { } foreach qw/title subject series keyword/; } - my $fields = $self->get_dynamic_fields; + my $fields = new_editor()->retrieve_all_elastic_bib_field; for my $field (@$fields) { @@ -424,7 +362,7 @@ sub create_index_properties { my $def; if ($field_class) { - if ($field->search_field) { + if ($field->search_field eq 't') { # Use the same fields and analysis as the 'grouped' field. $def = clone($properties->{$field_class}); @@ -448,7 +386,7 @@ sub create_index_properties { # Long sorter values are not necessarily unexpected, # e.g. long titles. - $def->{ignore_above} = $IGNORE_ABOVE unless $field->sorter; + $def->{ignore_above} = $IGNORE_ABOVE unless $field->sorter eq 't'; } if ($def) { @@ -461,7 +399,7 @@ sub create_index_properties { # Search and facet fields can have the same name/group pair, # but are stored as separate fields in ES since the content # may vary between the two. - if ($field->facet_field) { + if ($field->facet_field eq 't') { # Facet fields are stored as separate fields, because their # content may differ from the matching search field. @@ -527,7 +465,7 @@ sub create_index { $self->create_one_field_index($field, $properties->{$field}); } - # Now that we've added the static (and dynamic) fields, + # Now that we've added the configured fields, # add the shortened field_class aliases. while (my ($alias, $field) = each %SEARCH_CLASS_ALIAS_MAP) { return 0 unless $self->create_one_field_index( diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql index 077101f769..947b787c38 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql @@ -3,10 +3,14 @@ DROP SCHEMA IF EXISTS elastic CASCADE; BEGIN; -INSERT INTO config.global_flag (name, enabled, label) +INSERT INTO config.global_flag (name, enabled, label, value) VALUES ( 'elastic.bib_search.enabled', FALSE, - 'Elasticsearch Enable Bib Searching' + 'Elasticsearch Enable Bib Searching', NULL +), ( + 'elastic.bib_search.transform_file', FALSE, + 'Elasticsearch Bib Transform File [Relative to xsl directory]', + 'elastic-bib-transform.xsl' ); CREATE SCHEMA elastic; @@ -41,6 +45,22 @@ CREATE TABLE elastic.index ( CONSTRAINT valid_index_class CHECK (index_class IN ('bib-search')) ); +-- XXX consider storing the xsl chunk directly on the field, +-- then stitching the chunks together for indexing. This would +-- require a search chunk and a facet chunk. +CREATE TABLE elastic.bib_field ( + id SERIAL PRIMARY KEY, + name TEXT NOT NULL, + field_class TEXT REFERENCES config.metabib_class(name) ON DELETE CASCADE, + label TEXT NOT NULL UNIQUE, + search_field BOOLEAN NOT NULL DEFAULT FALSE, + facet_field BOOLEAN NOT NULL DEFAULT FALSE, + filter BOOLEAN NOT NULL DEFAULT FALSE, + sorter BOOLEAN NOT NULL DEFAULT FALSE, + weight INTEGER NOT NULL DEFAULT 1, + CONSTRAINT name_class_once_per_field UNIQUE (name, field_class) +); + CREATE OR REPLACE VIEW elastic.bib_last_mod_date AS /** * Last update date for each bib, which is taken from most recent @@ -72,6 +92,70 @@ INSERT INTO elastic.cluster (code, label) INSERT INTO elastic.node (label, host, proto, port, active, cluster) VALUES ('Localhost', 'localhost', 'http', 9200, TRUE, 'main'); +INSERT INTO elastic.bib_field + (field_class, name, label, search_field, facet_field, filter, sorter, weight) +VALUES ( + 'author', 'conference', '', FALSE, TRUE, FALSE, FALSE, 1), + 'author', 'corporate', '', FALSE, TRUE, FALSE, FALSE, 1), + 'author', 'personal', '', FALSE, TRUE, FALSE, FALSE, 1), + 'series', 'seriestitle', '', FALSE, TRUE, FALSE, FALSE, 1), + 'subject', 'geographic', '', FALSE, TRUE, FALSE, FALSE, 1), + 'subject', 'name', '', FALSE, TRUE, FALSE, FALSE, 1), + 'subject', 'topic', '', FALSE, TRUE, FALSE, FALSE, 1), + 'title', 'seriestitle', '', FALSE, TRUE, FALSE, FALSE, 1), + +filter _ audience _ +filter _ bib_level _ +filter _ date1 _ +filter _ date2 _ +filter _ item_form _ +filter _ item_lang _ +filter _ item_type _ +filter _ lit_form _ +filter _ search_format _ +filter _ sr_format _ +filter _ vr_format _ +search author added_personal +search author conference +search author conference_series +search author corporate +search author corporate_series +search author meeting +search author personal +search author personal_series +search author responsibility +search identifier bibcn +search identifier isbn +search identifier issn +search identifier lccn +search identifier match_isbn +search identifier sudoc +search identifier tech_number +search identifier upc +search keyword keyword _ +search keyword publisher +search series seriestitle +search subject corpname +search subject genre +search subject geographic +search subject meeting +search subject name +search subject topic +search subject uniftitle +search title abbreviated +search title added +search title alternative +search title former +search title magazine +search title maintitle 10 +search title previous +search title proper +search title seriestitle +search title succeeding +search title uniform +sorter _ author _ +sorter _ pubdate _ +sorter _ title _ COMMIT; /* UNDO diff --git a/Open-ILS/src/support-scripts/elastic-index.pl b/Open-ILS/src/support-scripts/elastic-index.pl index 8649c6dd73..64faff7804 100755 --- a/Open-ILS/src/support-scripts/elastic-index.pl +++ b/Open-ILS/src/support-scripts/elastic-index.pl @@ -9,7 +9,7 @@ use OpenILS::Elastic::BibSearch; my $help; my $osrf_config = '/openils/conf/opensrf_core.xml'; -my $bib_transform = '/openils/var/xsl/elastic-bib-transform.xsl'; +my $bib_transform; my $cluster; my $create_index; my $delete_index; @@ -79,6 +79,10 @@ sub help { Values default to their PG* environment variable equivalent. + --bib-transform + Override the configured global config value for + 'elastic.bib_search.transform_file' + --cluster Specify a cluster name. Defaults to 'main'. diff --git a/Open-ILS/xsl/elastic-bib-transform.xsl b/Open-ILS/xsl/elastic-bib-transform.xsl index abed33b632..b72f37a8f5 100644 --- a/Open-ILS/xsl/elastic-bib-transform.xsl +++ b/Open-ILS/xsl/elastic-bib-transform.xsl @@ -5,32 +5,6 @@