my $DEFAULT_BIB_BATCH_SIZE = 500;
my $INDEX_CLASS = 'bib-search';
+# https://www.elastic.co/guide/en/elasticsearch/reference/current/ignore-above.html
+# Useful for ignoring excessively long filters, sorters, and facets.
+# Only applied to the keyword variation of each index. Does not affect
+# the 'text' varieties.
+my $IGNORE_ABOVE = 256;
+
my $BASE_INDEX_SETTINGS = {
analysis => {
analyzer => {
# searched via 'text' indexes.
title => {
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
normalizer => 'custom_lowercase',
fields => {
text => {type => 'text'},
},
author => {
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
normalizer => 'custom_lowercase',
fields => {
text => {type => 'text'},
},
subject => {
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
normalizer => 'custom_lowercase',
fields => {
text => {type => 'text'},
},
series => {
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
normalizer => 'custom_lowercase',
fields => {
text => {type => 'text'},
# keyword field, but we index it just the same (sans lowercase)
# for structural consistency with other group fields.
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
fields => {
text => {type => 'text'},
text_folded => {type => 'text', analyzer => 'folding'},
identifier => {
# Avoid full-text indexing on identifier fields.
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
normalizer => 'custom_lowercase',
},
kw => {type => 'text'},
id => {
type => 'keyword',
- ignore_above => 256
+ ignore_above => $IGNORE_ABOVE
}
};
}
} else {
-
- # Non-grouped fields are used for filtering and sorting, so
- # they don't need as much processing.
+ # Filters and sorters
$def = {
type => 'keyword',
- ignore_above => 256,
+ ignore_above => $IGNORE_ABOVE,
normalizer => 'custom_lowercase'
};
}
+ if ($def) {
+ $logger->debug("ES adding field $field_name: ".
+ OpenSRF::Utils::JSON->perl2JSON($def));
+
+ $properties->{$field_name} = $def;
+ }
+
+ # Search and facet fields can have the same name/group pair,
+ # but are stored as separate fields in ES since the content
+ # may vary between the two.
if ($field->facet_field eq 't') {
- $def->{fields} = {} unless $def->{fields}; # facet only?
- # Facet fields are used for aggregation which requires
- # an additional unaltered keyword field.
- $def->{fields}->{facet} = {
+
+ # Facet fields are stored as separate fields, because their
+ # content may differ from the matching search field.
+ $field_name = "$field_name|facet";
+
+ $def = {
type => 'keyword',
- ignore_above => 256
+ ignore_above => $IGNORE_ABOVE
};
- }
- $logger->debug("ES adding field $field_name: ".
- OpenSRF::Utils::JSON->perl2JSON($def));
+ $logger->debug("ES adding field $field_name: ".
+ OpenSRF::Utils::JSON->perl2JSON($def));
- $properties->{$field_name} = $def;
+ $properties->{$field_name} = $def;
+ }
}
return $properties;
return 1;
}
+# TODO: elastic.bib_record_properties needs to also pull values
+# from metabib.facet_entry
+# TODO: stamp each field with a 'purpose' (search, facet, filter, sorter)
sub get_bib_data {
my ($self, $record_ids) = @_;
next unless defined $value && $value ne '';
$fname = "$fclass|$fname" if $fclass;
+ $fname = "$fname|facet" if $field->{purpose} eq 'facet';
$value = $self->truncate_value($value);
if ($fname eq 'identifier|isbn') {
return $self->{sorter} ? 't' : 'f';
}
+sub weight {
+ return $self->{weight} || 1;
+}
+
package OpenILS::Elastic::BibSearch::XSLT;
use strict;
use warnings;
my ($self) = @_;
$self->{xsl_doc} = XML::LibXML->load_xml(location => $self->xsl_file);
- unless ($self->{xsl_doc});
+ unless $self->{xsl_doc};
return $self->{xsl_doc};
}
push(@$fields, $field);
}
-# TODO: what to do about fields that have the same class/name
-# and are both search and facet fields, but the facet values
-# are different than the searched value?
-
sub get_dynamic_fields {
my $self = shift;
my $fields = [];
my $result = $self->xsl_sheet->transform($marc_doc);
my $output = $stylesheet->output_as_chars($result);
- my @fields = split(/\n/, $output);
- for my $field (@fields) {
- my @parts = split(/ /, $field);
- my $field_type = $parts[0];
+ my @rows = split(/\n/, $output);
+ my $first = 1;
+ for my $row (@rows) {
+ my @parts = split(/ /, $row);
+ my $purpose = $parts[0];
+
+ my $field = {purpose => $purpose};
+
+ if ($first) {
+ # Stamp the first field with the additional bib metadata.
+ $field->{$_} = $db_rec->{$_} for
+ qw/id bib_source metarecord create_date edit_date/;
+ $first = 0;
+ }
+
+ if ($purpose eq 'search') {
+ $field->{search_group} = @parts[1];
+ $field->{name} = @parts[2];
+ $field->{weight} = @parts[3];
+ $field->{value} = join(' ', @parts[4..$#parts]);
+
+ } elsif ($purpose eq 'facet') {
+ $field->{search_group} = @parts[1];
+ $field->{name} = @parts[2];
+ $field->{value} = join(' ', @parts[3..$#parts]);
- if ($field_type eq 'search') {
- } elsif ($field_type eq 'facet') {
- } elsif ($field_type eq 'filter') {
- } elsif ($field_type eq 'sorter') {
+ } elsif ($purpose eq 'filter' || $purpose eq 'sorter') {
+ $field->{name} = @parts[1];
+ $field->{value} = join(' ', @parts[2..$#parts]);
}
}
+
+ push(@$bib_data, $field);
}
+
+ return $bib_data;
}
sub get_bib_db_data {