Avoid huge index values
authorBill Erickson <berickxx@gmail.com>
Tue, 12 Feb 2019 17:02:36 +0000 (17:02 +0000)
committerBill Erickson <berickxx@gmail.com>
Wed, 28 Aug 2019 21:41:55 +0000 (17:41 -0400)
Signed-off-by: Bill Erickson <berickxx@gmail.com>
Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm

index 52c0e02..8c6aeb2 100644 (file)
@@ -15,6 +15,7 @@ package OpenILS::Elastic::BibSearch;
 # ---------------------------------------------------------------
 use strict;
 use warnings;
+use Encode;
 use DateTime;
 use Time::HiRes qw/time/;
 use OpenSRF::Utils::Logger qw/:logger/;
@@ -302,25 +303,30 @@ sub populate_bib_index_batch {
 
             my $fclass = $field->{search_group};
             my $fname = $field->{name};
+            my $value = $field->{value};
             $fname = "$fclass|$fname" if $fclass;
 
+            # Lucene has a hard limit on the size of an indexable chunk.
+            # Avoid trying to index such data by lazily chopping it off
+            # at 1/4 the limit to accomodate all UTF-8 chars.
+            if (length(Encode::encode('UTF-8', $value)) > 32760) {
+                $value = substr($value, 0, 8190);
+            }
+
             if ($body->{$fname}) {
                 if (ref $body->{$fname}) {
                     # Three or more values encountered for field.
                     # Add to the list.
-                    push(@{$body->{$fname}}, $field->{value});
+                    push(@{$body->{$fname}}, $value);
                 } else {
                     # Second value encountered for field.
                     # Upgrade to array storage.
-                    $body->{$fname} = [
-                        $body->{$fname},
-                        $field->{value}
-                    ]
+                    $body->{$fname} = [$body->{$fname}, $value];
                 }
             } else {
                 # First value encountered for field.
                 # Assume for now there will only be one value.
-                $body->{$fname} = $field->{value}
+                $body->{$fname} = $value
             }
         }