From 3a17e1fc403322b256fc3ca852ace2fcab873e9d Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Thu, 3 Oct 2019 10:46:39 -0400 Subject: [PATCH] Move to in-db field configs Signed-off-by: Bill Erickson --- .../src/perlmods/lib/OpenILS/Elastic/BibSearch.pm | 26 +- .../sql/Pg/upgrade/XXXX.schema.elastic-search.sql | 71 +- Open-ILS/src/support-scripts/elastic-index.pl | 15 +- .../support-scripts/elastic-mappings.example.json | 818 --------------------- 4 files changed, 65 insertions(+), 865 deletions(-) delete mode 100644 Open-ILS/src/support-scripts/elastic-mappings.example.json diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index 341d345a42..d45d62a318 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -184,27 +184,7 @@ sub language_analyzers { } sub create_index_properties { - my ($self, $custom_properties) = @_; - - if ($custom_properties) { - $logger->info("ES generating index mappings from custom file $custom_properties"); - - my $json; - { - local $/=undef; - - if (!open(MAPPING_FILE, $custom_properties)) { - $logger->error("ES cannot open mappings file: $!"); - return undef; - } - - $json = ; - close MAPPING_FILE; - } - - my $struct = OpenSRF::Utils::JSON->JSON2perl($json); - return $struct->{'bib-search'}->{mappings}->{record}->{properties}; - } + my ($self) = @_; my $properties = $BASE_PROPERTIES; @@ -280,7 +260,7 @@ sub create_index_properties { } sub create_index { - my ($self, $custom_properties) = @_; + my ($self) = @_; if ($self->es->indices->exists(index => $INDEX_NAME)) { $logger->warn("ES index '$INDEX_NAME' already exists"); @@ -290,7 +270,7 @@ sub create_index { $logger->info( "ES creating index '$INDEX_NAME' on cluster '".$self->cluster."'"); - my $properties = $self->create_index_properties($custom_properties); + my $properties = $self->create_index_properties; my $settings = $BASE_INDEX_SETTINGS; $settings->{number_of_replicas} = scalar(@{$self->nodes}); diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql index d890540ced..a8890f1839 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.elastic-search.sql @@ -3,6 +3,23 @@ DROP SCHEMA IF EXISTS elastic CASCADE; BEGIN; +ALTER TABLE config.record_attr_definition + ADD COLUMN elastic_field BOOLEAN NOT NULL DEFAULT FALSE; + +ALTER TABLE config.metabib_field + ADD COLUMN elastic_field BOOLEAN NOT NULL DEFAULT FALSE; + +-- Provide a sweeping set of default elastic fields. +-- Likely this set of fields can be trimmed significantly for most sites, +-- since many of these fields will never be searched from the catalog. +-- Reducing the number of elastic_field's will improve indexing time, +-- search time, and reduce Elastic disk space requirements. +UPDATE config.record_attr_definition + SET elastic_field = TRUE WHERE name NOT LIKE 'marc21_%'; + +UPDATE config.metabib_field + SET elastic_field = TRUE WHERE search_field OR facet_field; + CREATE SCHEMA elastic; CREATE TABLE elastic.cluster ( @@ -45,7 +62,7 @@ CREATE OR REPLACE VIEW elastic.bib_field AS FALSE AS facet_field, 1 AS weight FROM config.record_attr_definition crad - WHERE crad.name NOT LIKE '%_ind_%' + WHERE crad.elastic_field UNION SELECT cmf.id AS metabib_field, @@ -57,8 +74,8 @@ CREATE OR REPLACE VIEW elastic.bib_field AS (cmf.field_class <> 'identifier' AND cmf.search_field) AS search_field, cmf.facet_field, cmf.weight - FROM config.metabib_field cmf - WHERE cmf.search_field OR cmf.facet_field + FROM config.metabib_field cmf + WHERE cmf.elastic_field ) fields; -- Note this could be done with a view, but pushing the bib ID @@ -85,6 +102,7 @@ BEGIN FROM metabib.record_sorter mrs JOIN config.record_attr_definition crad ON (crad.name = mrs.attr) WHERE mrs.source = $$ || QUOTE_LITERAL(bre_id) || $$ + AND crad.elastic_field UNION -- record attributes @@ -96,6 +114,7 @@ BEGIN FROM metabib.record_attr_flat mraf JOIN config.record_attr_definition crad ON (crad.name = mraf.attr) WHERE mraf.id = $$ || QUOTE_LITERAL(bre_id) || $$ + AND crad.elastic_field UNION -- metabib field search/facet entries @@ -119,8 +138,8 @@ BEGIN -- longer be used by EG). SELECT * FROM biblio.extract_metabib_field_entry( $$ || QUOTE_LITERAL(bre_id) || $$, ' ', '{facet,search}', - (SELECT ARRAY_AGG(id) FROM config.metabib_field - WHERE search_field OR facet_field) + (SELECT ARRAY_AGG(id) + FROM config.metabib_field WHERE elastic_field) ) ) compiled JOIN config.metabib_field cmf ON (cmf.id = compiled.field) @@ -155,14 +174,14 @@ CREATE OR REPLACE VIEW elastic.bib_last_mod_date AS /* SEED DATA ------------------------------------------------------------ */ -INSERT INTO elastic.cluster (code, label) VALUES ('main', 'Main Cluster'); +INSERT INTO elastic.cluster (code, label) + VALUES ('main', 'Main Cluster'); -INSERT INTO elastic.node - (label, host, proto, port, active, cluster) -VALUES ('Localhost', 'localhost', 'http', 9200, TRUE, 'main'); +INSERT INTO elastic.node (label, host, proto, port, active, cluster) + VALUES ('Localhost', 'localhost', 'http', 9200, TRUE, 'main'); INSERT INTO elastic.index (code, active, cluster) -VALUES ('bib-search', TRUE, 'main'); + VALUES ('bib-search', TRUE, 'main'); COMMIT; @@ -170,5 +189,37 @@ COMMIT; DROP SCHEMA IF EXISTS elastic CASCADE; +ALTER TABLE config.record_attr_definition DROP COLUMN elastic_field; + +ALTER TABLE config.metabib_field DROP COLUMN elastic_field; + */ +/* +-- Sample narrower set of elastic fields to avoid duplication and +-- indexing data that will likely never be searched. + +UPDATE config.metabib_field SET elastic_field = FALSE +WHERE + (field_class = 'keyword' AND name <> 'keyword') OR + (field_class = 'subject' AND name = 'complete') OR + (field_class = 'author' AND name = 'first_author') +; + +UPDATE config.record_attr_definition SET elastic_field = FALSE +WHERE name NOT IN ( + 'authorsort', + 'date1', + 'date2', + 'bib_level', + 'icon_format', + 'item_form', + 'item_lang', + 'item_type', + 'lit_form', + 'search_format', + 'titlesort', + 'sr_format', + 'vr_format' +); + diff --git a/Open-ILS/src/support-scripts/elastic-index.pl b/Open-ILS/src/support-scripts/elastic-index.pl index 5cc495bf8c..05ebc6b9dc 100755 --- a/Open-ILS/src/support-scripts/elastic-index.pl +++ b/Open-ILS/src/support-scripts/elastic-index.pl @@ -20,7 +20,6 @@ my $stop_record; my $modified_since; my $max_duration; my $batch_size = 500; -my $custom_mappings; # Database settings read from ENV by default. my $db_host = $ENV{PGHOST} || 'localhost'; @@ -43,7 +42,6 @@ GetOptions( 'modified-since=s' => \$modified_since, 'max-duration=s' => \$max_duration, 'batch-size=s' => \$batch_size, - 'custom-mappings=s' => \$custom_mappings, 'db-name=s' => \$db_name, 'db-host=s' => \$db_host, 'db-port=s' => \$db_port, @@ -114,17 +112,6 @@ sub help { are provided (e.g. --index-start-record) then all applicable values will be indexed. - --custom-mappings - Path to a JSON file continaining custom index mapping - definitions. The mapppings must match the stock mapping - structure, fields may only be removed. Added fields will - be ignored at data population time (barring code changes). - - For example: - - curl http://ELASTIC_HOST/bib-search?pretty > mappings.json - # edit mappings.json and remove stuff you don't want. - $0 --create-index --custom-mappings mappings.json HELP exit(0); } @@ -154,7 +141,7 @@ if ($delete_index) { } if ($create_index) { - $es->create_index($custom_mappings) or die "Index create failed.\n"; + $es->create_index or die "Index create failed.\n"; } if ($populate) { diff --git a/Open-ILS/src/support-scripts/elastic-mappings.example.json b/Open-ILS/src/support-scripts/elastic-mappings.example.json deleted file mode 100644 index 75d90d6a68..0000000000 --- a/Open-ILS/src/support-scripts/elastic-mappings.example.json +++ /dev/null @@ -1,818 +0,0 @@ -{ - "//": "File initially generated from a stock Evergreen Elastic index and trimmed to reduce duplication and remove fields that are not typically searched via the catalog. See --custom-mappings documentation in elastic-index.pl", - "bib-search": { - "aliases": {}, - "mappings": { - "record": { - "dynamic": "false", - "properties": { - "au": { - "type": "text" - }, - "audience": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "authorsort": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author|conference": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "author", - "au" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author|corporate": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "author", - "au" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author|creator": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "author", - "au" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author|first_author": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "author", - "au" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author|other": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "author", - "au" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "author|personal": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "author", - "au" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "bib_level": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "bib_source": { - "type": "integer" - }, - "cat_form": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "create_date": { - "type": "date" - }, - "date1": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "date2": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "edit_date": { - "type": "date" - }, - "holdings": { - "type": "nested", - "properties": { - "circ_lib": { - "type": "integer" - }, - "circulate": { - "type": "boolean" - }, - "location": { - "type": "integer" - }, - "opac_visible": { - "type": "boolean" - }, - "status": { - "type": "integer" - } - } - }, - "icon_format": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "id": { - "type": "keyword", - "ignore_above": 256 - }, - "identifier": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|accession": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|authority_id": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|bibcn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|bibid": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|ean": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|edition": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|genre": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|isbn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|ismn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|isrc": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|issn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|lccn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|publisher": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|scn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|sici": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|tcn": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "identifier|upc": { - "type": "keyword", - "copy_to": [ - "identifier", - "id" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "item_form": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "item_lang": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "item_type": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "keyword": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "ignore_above": 256 - }, - "keyword|keyword": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "keyword", - "kw" - ], - "ignore_above": 256 - }, - "kw": { - "type": "text" - }, - "lit_form": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "marc": { - "type": "nested", - "properties": { - "subfield": { - "type": "keyword", - "normalizer": "custom_lowercase" - }, - "tag": { - "type": "keyword", - "normalizer": "custom_lowercase" - }, - "value": { - "type": "text", - "fields": { - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - } - } - } - }, - "metarecord": { - "type": "integer" - }, - "se": { - "type": "text" - }, - "search_format": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "series": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "series|seriestitle": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "series", - "se" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "sr_format": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "su": { - "type": "text" - }, - "subject": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "subject|geographic": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "subject", - "su" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "subject|name": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "subject", - "su" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "subject|temporal": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "subject", - "su" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "subject|topic": { - "type": "keyword", - "fields": { - "facet": { - "type": "keyword", - "ignore_above": 256 - }, - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "subject", - "su" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "ti": { - "type": "text" - }, - "title": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "titlesort": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "title|abbreviated": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "title", - "ti" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "title|alternative": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "title", - "ti" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "title|maintitle": { - "type": "keyword", - "fields": { - "text": { - "type": "text", - "boost": 10 - }, - "text_english": { - "type": "text", - "boost": 10, - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "boost": 10, - "analyzer": "folding" - } - }, - "copy_to": [ - "title", - "ti" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "title|proper": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "title", - "ti" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "title|translated": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "title", - "ti" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "title|uniform": { - "type": "keyword", - "fields": { - "text": { - "type": "text" - }, - "text_english": { - "type": "text", - "analyzer": "english" - }, - "text_folded": { - "type": "text", - "analyzer": "folding" - } - }, - "copy_to": [ - "title", - "ti" - ], - "ignore_above": 256, - "normalizer": "custom_lowercase" - }, - "vr_format": { - "type": "keyword", - "ignore_above": 256, - "normalizer": "custom_lowercase" - } - } - } - } - } -} -- 2.11.0