From: Bill Erickson Date: Fri, 27 Sep 2019 19:44:34 +0000 (-0400) Subject: Avoid passing non-indexed fields to save disk space X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=1c00e23a8cfccc1fd4c4e0e3d2985ebc784b7ffd;p=working%2FEvergreen.git Avoid passing non-indexed fields to save disk space Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm index d049630d4a..be430e5e4b 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic.pm @@ -263,6 +263,10 @@ sub truncate_value { return $value; } +sub get_index_def { + my ($self) = @_; + return $self->es->indices->get(index => $self->index_name); +} diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm index 8e15361abf..d026360861 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/Bib/Search.pm @@ -392,6 +392,13 @@ sub populate_bib_index_batch { my $holdings = $self->load_holdings($bib_ids); my $marc = $self->load_marc($bib_ids); + my $def = $self->get_index_def; + + # Ask ES what the index properties are so we can avoid passing data + # that will not be indexed, since ES will store the data on the source + # object even if it's not indexed. This reduces bulk. + my $properties = # nestier than expected, not sure why. + $def->{$self->index_name}->{mappings}->{record}->{properties}->{record}->{properties}; for my $bib_id (@$bib_ids) { @@ -429,7 +436,7 @@ sub populate_bib_index_batch { } elsif ($fname eq 'identifier|issn') { index_issns($body, $value); } else { - append_field_value($body, $fname, $value); + append_field_value($body, $fname, $value, $properties); } } @@ -495,7 +502,11 @@ sub index_issns { } sub append_field_value { - my ($body, $fname, $value) = @_; + my ($body, $fname, $value, $properties) = @_; + + # Confirm the data is wanted in the index before passing to ES to + # reduce the overall data footprint. + return unless $properties->{$fname}; if ($body->{$fname}) { if (ref $body->{$fname}) {