From: Bill Erickson <berickxx@gmail.com>
Date: Mon, 11 Feb 2019 17:11:10 +0000 (-0500)
Subject: Indexer orders by ID for batch consistency
X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=af88693e7596aa831188f3ecaf8d36b3507c1013;p=working%2FEvergreen.git

Indexer orders by ID for batch consistency

Signed-off-by: Bill Erickson <berickxx@gmail.com>
---

diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
index 86abb412a6..b9a7e1f390 100644
--- a/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
+++ b/Open-ILS/src/perlmods/lib/OpenILS/Elastic/BibSearch.pm
@@ -218,22 +218,26 @@ sub get_bib_ids {
     my $stop_id = $state->{stop_record}; # TODO
     my $start_date = $state->{start_date};
 
-    my ($select, $from, $where, $order);
+    my ($select, $from, $where);
     if ($start_date) {
         $select = "SELECT id";
         $from   = "FROM elastic.bib_last_mod_date";
         $where  = "WHERE last_mod_date > '$start_date'";
-        $order  = "ORDER BY last_mod_date";
     } else {
         $select = "SELECT id";
         $from   = "FROM biblio.record_entry";
         $where  = "WHERE NOT deleted AND active";
-        $order  = "ORDER BY edit_date, id";
     }
 
     $where .= " AND id >= $start_id" if $start_id;
     $where .= " AND id <= $stop_id" if $stop_id;
 
+    # Ordering by ID is the simplest way to guarantee all requested
+    # records are processed, given that edit dates may not be unique
+    # and that we're using start_id/stop_id instead of OFFSET to
+    # define the batches.
+    my $order = "ORDER BY id";
+
     my $sql = "$select $from $where $order LIMIT $BIB_BATCH_SIZE";
 
     my $ids = $self->get_db_rows($sql);