From: Bill Erickson Date: Tue, 12 Feb 2019 17:02:36 +0000 (+0000) Subject: Avoid huge index values X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=6aea27cc3741634fd4a484c760b1f5a0a4ebf805;p=working%2FEvergreen.git Avoid huge index values Signed-off-by: Bill Erickson --- diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm index 52c0e02e6a..8c6aeb2063 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm @@ -15,6 +15,7 @@ package OpenILS::Elastic::BibSearch; # --------------------------------------------------------------- use strict; use warnings; +use Encode; use DateTime; use Time::HiRes qw/time/; use OpenSRF::Utils::Logger qw/:logger/; @@ -302,25 +303,30 @@ sub populate_bib_index_batch { my $fclass = $field->{search_group}; my $fname = $field->{name}; + my $value = $field->{value}; $fname = "$fclass|$fname" if $fclass; + # Lucene has a hard limit on the size of an indexable chunk. + # Avoid trying to index such data by lazily chopping it off + # at 1/4 the limit to accomodate all UTF-8 chars. + if (length(Encode::encode('UTF-8', $value)) > 32760) { + $value = substr($value, 0, 8190); + } + if ($body->{$fname}) { if (ref $body->{$fname}) { # Three or more values encountered for field. # Add to the list. - push(@{$body->{$fname}}, $field->{value}); + push(@{$body->{$fname}}, $value); } else { # Second value encountered for field. # Upgrade to array storage. - $body->{$fname} = [ - $body->{$fname}, - $field->{value} - ] + $body->{$fname} = [$body->{$fname}, $value]; } } else { # First value encountered for field. # Assume for now there will only be one value. - $body->{$fname} = $field->{value} + $body->{$fname} = $value } }