}
sub create_index_properties {
- my ($self, $custom_properties) = @_;
-
- if ($custom_properties) {
- $logger->info("ES generating index mappings from custom file $custom_properties");
-
- my $json;
- {
- local $/=undef;
-
- if (!open(MAPPING_FILE, $custom_properties)) {
- $logger->error("ES cannot open mappings file: $!");
- return undef;
- }
-
- $json = <MAPPING_FILE>;
- close MAPPING_FILE;
- }
-
- my $struct = OpenSRF::Utils::JSON->JSON2perl($json);
- return $struct->{'bib-search'}->{mappings}->{record}->{properties};
- }
+ my ($self) = @_;
my $properties = $BASE_PROPERTIES;
}
sub create_index {
- my ($self, $custom_properties) = @_;
+ my ($self) = @_;
if ($self->es->indices->exists(index => $INDEX_NAME)) {
$logger->warn("ES index '$INDEX_NAME' already exists");
$logger->info(
"ES creating index '$INDEX_NAME' on cluster '".$self->cluster."'");
- my $properties = $self->create_index_properties($custom_properties);
+ my $properties = $self->create_index_properties;
my $settings = $BASE_INDEX_SETTINGS;
$settings->{number_of_replicas} = scalar(@{$self->nodes});
BEGIN;
+ALTER TABLE config.record_attr_definition
+ ADD COLUMN elastic_field BOOLEAN NOT NULL DEFAULT FALSE;
+
+ALTER TABLE config.metabib_field
+ ADD COLUMN elastic_field BOOLEAN NOT NULL DEFAULT FALSE;
+
+-- Provide a sweeping set of default elastic fields.
+-- Likely this set of fields can be trimmed significantly for most sites,
+-- since many of these fields will never be searched from the catalog.
+-- Reducing the number of elastic_field's will improve indexing time,
+-- search time, and reduce Elastic disk space requirements.
+UPDATE config.record_attr_definition
+ SET elastic_field = TRUE WHERE name NOT LIKE 'marc21_%';
+
+UPDATE config.metabib_field
+ SET elastic_field = TRUE WHERE search_field OR facet_field;
+
CREATE SCHEMA elastic;
CREATE TABLE elastic.cluster (
FALSE AS facet_field,
1 AS weight
FROM config.record_attr_definition crad
- WHERE crad.name NOT LIKE '%_ind_%'
+ WHERE crad.elastic_field
UNION
SELECT
cmf.id AS metabib_field,
(cmf.field_class <> 'identifier' AND cmf.search_field) AS search_field,
cmf.facet_field,
cmf.weight
- FROM config.metabib_field cmf
- WHERE cmf.search_field OR cmf.facet_field
+ FROM config.metabib_field cmf
+ WHERE cmf.elastic_field
) fields;
-- Note this could be done with a view, but pushing the bib ID
FROM metabib.record_sorter mrs
JOIN config.record_attr_definition crad ON (crad.name = mrs.attr)
WHERE mrs.source = $$ || QUOTE_LITERAL(bre_id) || $$
+ AND crad.elastic_field
UNION
-- record attributes
FROM metabib.record_attr_flat mraf
JOIN config.record_attr_definition crad ON (crad.name = mraf.attr)
WHERE mraf.id = $$ || QUOTE_LITERAL(bre_id) || $$
+ AND crad.elastic_field
UNION
-- metabib field search/facet entries
-- longer be used by EG).
SELECT * FROM biblio.extract_metabib_field_entry(
$$ || QUOTE_LITERAL(bre_id) || $$, ' ', '{facet,search}',
- (SELECT ARRAY_AGG(id) FROM config.metabib_field
- WHERE search_field OR facet_field)
+ (SELECT ARRAY_AGG(id)
+ FROM config.metabib_field WHERE elastic_field)
)
) compiled
JOIN config.metabib_field cmf ON (cmf.id = compiled.field)
/* SEED DATA ------------------------------------------------------------ */
-INSERT INTO elastic.cluster (code, label) VALUES ('main', 'Main Cluster');
+INSERT INTO elastic.cluster (code, label)
+ VALUES ('main', 'Main Cluster');
-INSERT INTO elastic.node
- (label, host, proto, port, active, cluster)
-VALUES ('Localhost', 'localhost', 'http', 9200, TRUE, 'main');
+INSERT INTO elastic.node (label, host, proto, port, active, cluster)
+ VALUES ('Localhost', 'localhost', 'http', 9200, TRUE, 'main');
INSERT INTO elastic.index (code, active, cluster)
-VALUES ('bib-search', TRUE, 'main');
+ VALUES ('bib-search', TRUE, 'main');
COMMIT;
DROP SCHEMA IF EXISTS elastic CASCADE;
+ALTER TABLE config.record_attr_definition DROP COLUMN elastic_field;
+
+ALTER TABLE config.metabib_field DROP COLUMN elastic_field;
+
*/
+/*
+-- Sample narrower set of elastic fields to avoid duplication and
+-- indexing data that will likely never be searched.
+
+UPDATE config.metabib_field SET elastic_field = FALSE
+WHERE
+ (field_class = 'keyword' AND name <> 'keyword') OR
+ (field_class = 'subject' AND name = 'complete') OR
+ (field_class = 'author' AND name = 'first_author')
+;
+
+UPDATE config.record_attr_definition SET elastic_field = FALSE
+WHERE name NOT IN (
+ 'authorsort',
+ 'date1',
+ 'date2',
+ 'bib_level',
+ 'icon_format',
+ 'item_form',
+ 'item_lang',
+ 'item_type',
+ 'lit_form',
+ 'search_format',
+ 'titlesort',
+ 'sr_format',
+ 'vr_format'
+);
+
my $modified_since;
my $max_duration;
my $batch_size = 500;
-my $custom_mappings;
# Database settings read from ENV by default.
my $db_host = $ENV{PGHOST} || 'localhost';
'modified-since=s' => \$modified_since,
'max-duration=s' => \$max_duration,
'batch-size=s' => \$batch_size,
- 'custom-mappings=s' => \$custom_mappings,
'db-name=s' => \$db_name,
'db-host=s' => \$db_host,
'db-port=s' => \$db_port,
are provided (e.g. --index-start-record) then all
applicable values will be indexed.
- --custom-mappings
- Path to a JSON file continaining custom index mapping
- definitions. The mapppings must match the stock mapping
- structure, fields may only be removed. Added fields will
- be ignored at data population time (barring code changes).
-
- For example:
-
- curl http://ELASTIC_HOST/bib-search?pretty > mappings.json
- # edit mappings.json and remove stuff you don't want.
- $0 --create-index --custom-mappings mappings.json
HELP
exit(0);
}
}
if ($create_index) {
- $es->create_index($custom_mappings) or die "Index create failed.\n";
+ $es->create_index or die "Index create failed.\n";
}
if ($populate) {
+++ /dev/null
-{
- "//": "File initially generated from a stock Evergreen Elastic index and trimmed to reduce duplication and remove fields that are not typically searched via the catalog. See --custom-mappings documentation in elastic-index.pl",
- "bib-search": {
- "aliases": {},
- "mappings": {
- "record": {
- "dynamic": "false",
- "properties": {
- "au": {
- "type": "text"
- },
- "audience": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "authorsort": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author|conference": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "author",
- "au"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author|corporate": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "author",
- "au"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author|creator": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "author",
- "au"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author|first_author": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "author",
- "au"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author|other": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "author",
- "au"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "author|personal": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "author",
- "au"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "bib_level": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "bib_source": {
- "type": "integer"
- },
- "cat_form": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "create_date": {
- "type": "date"
- },
- "date1": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "date2": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "edit_date": {
- "type": "date"
- },
- "holdings": {
- "type": "nested",
- "properties": {
- "circ_lib": {
- "type": "integer"
- },
- "circulate": {
- "type": "boolean"
- },
- "location": {
- "type": "integer"
- },
- "opac_visible": {
- "type": "boolean"
- },
- "status": {
- "type": "integer"
- }
- }
- },
- "icon_format": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "id": {
- "type": "keyword",
- "ignore_above": 256
- },
- "identifier": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|accession": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|authority_id": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|bibcn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|bibid": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|ean": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|edition": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|genre": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|isbn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|ismn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|isrc": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|issn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|lccn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|publisher": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|scn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|sici": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|tcn": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "identifier|upc": {
- "type": "keyword",
- "copy_to": [
- "identifier",
- "id"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "item_form": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "item_lang": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "item_type": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "keyword": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "ignore_above": 256
- },
- "keyword|keyword": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "keyword",
- "kw"
- ],
- "ignore_above": 256
- },
- "kw": {
- "type": "text"
- },
- "lit_form": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "marc": {
- "type": "nested",
- "properties": {
- "subfield": {
- "type": "keyword",
- "normalizer": "custom_lowercase"
- },
- "tag": {
- "type": "keyword",
- "normalizer": "custom_lowercase"
- },
- "value": {
- "type": "text",
- "fields": {
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- }
- }
- }
- },
- "metarecord": {
- "type": "integer"
- },
- "se": {
- "type": "text"
- },
- "search_format": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "series": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "series|seriestitle": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "series",
- "se"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "sr_format": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "su": {
- "type": "text"
- },
- "subject": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "subject|geographic": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "subject",
- "su"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "subject|name": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "subject",
- "su"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "subject|temporal": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "subject",
- "su"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "subject|topic": {
- "type": "keyword",
- "fields": {
- "facet": {
- "type": "keyword",
- "ignore_above": 256
- },
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "subject",
- "su"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "ti": {
- "type": "text"
- },
- "title": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "titlesort": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "title|abbreviated": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "title",
- "ti"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "title|alternative": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "title",
- "ti"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "title|maintitle": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text",
- "boost": 10
- },
- "text_english": {
- "type": "text",
- "boost": 10,
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "boost": 10,
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "title",
- "ti"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "title|proper": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "title",
- "ti"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "title|translated": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "title",
- "ti"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "title|uniform": {
- "type": "keyword",
- "fields": {
- "text": {
- "type": "text"
- },
- "text_english": {
- "type": "text",
- "analyzer": "english"
- },
- "text_folded": {
- "type": "text",
- "analyzer": "folding"
- }
- },
- "copy_to": [
- "title",
- "ti"
- ],
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- },
- "vr_format": {
- "type": "keyword",
- "ignore_above": 256,
- "normalizer": "custom_lowercase"
- }
- }
- }
- }
- }
-}