LP#1953057: improve reset-after-crash handling for Perl apps

author Galen Charlton <gmc@equinoxOLI.org>

Thu, 2 Dec 2021 16:48:48 +0000 (11:48 -0500)

committer Chris Sharp <csharp@georgialibraries.org>

Tue, 18 Jan 2022 18:10:09 +0000 (13:10 -0500)
author Galen Charlton <gmc@equinoxOLI.org>
Thu, 2 Dec 2021 16:48:48 +0000 (11:48 -0500)
committer Chris Sharp <csharp@georgialibraries.org>
Tue, 18 Jan 2022 18:10:09 +0000 (13:10 -0500)
diff --git a/src/perl/lib/OpenSRF/Server.pm b/src/perl/lib/OpenSRF/Server.pm

index db18302..4645a5c 100644 (file)
--- a/src/perl/lib/OpenSRF/Server.pm
+++ b/src/perl/lib/OpenSRF/Server.pm
@@ -100,7 +100,17 @@ sub cleanup {
      $self->{osrf_handle}->disconnect;
  
      # clean up our dead children
-    $self->reap_children(1);
+    if ($no_exit) {
+        # reap children in a way that anticipates keeping
+        # the listener going
+        $self->forcibly_reap_children();
+    } else {
+        # we're not sticking around, so if setting
+        # $SIG{CHLD} to ignore means that reap_children()
+        # doesn't detect the auto-reaped ones, we don't
+        # care
+        $self->reap_children(1);
+    }
  
      exit(0) unless $no_exit;
  }
@@ -545,6 +555,44 @@ sub reap_children {
  }
  
  # ----------------------------------------------------------------
+# Cleans up any child processes that (hopefully) have  exited in
+# the event that a no-exit cleanup was requested (i.e., if the listener
+# hit an exception but we're restarting things rather than letting
+# the listener die). Note that it's possible that (effectively)
+# orphans could be left over.
+# ----------------------------------------------------------------
+sub forcibly_reap_children {
+    my ($self) = @_;
+
+    $chatty and $logger->warn('reaping after a listener crash');
+
+    # try a normal reap...
+    $self->reap_children(1);
+
+    # ...then apply the hammer
+    foreach my $pid (keys %{ $self->{pid_map} }) {
+        my $child = $self->{pid_map}->{$pid};
+
+        close($child->{pipe_to_parent});
+        close($child->{pipe_to_child});
+
+        delete $self->{pid_map}->{$pid};
+        # unlike the normal reaping process, we don't need
+        # to put this on a zombie list to clean up later; if we
+        # get here, we're not in the main listener run loop
+        delete $child->{$_} for keys %$child; # destroy with a vengeance
+    }
+
+    # Reset the lists. It's possible that leftover children from the
+    # previous ->run() invocation are still around, but there's not much
+    # we can do about those leaks at this point. 
+    $self->{num_children} = 0;
+    $self->{active_list} = [];
+    $self->{idle_list} = [];
+
+}
+
+# ----------------------------------------------------------------
  # Spawn up to max_children processes
  # ----------------------------------------------------------------
  sub spawn_children {
author	Galen Charlton <gmc@equinoxOLI.org>
	Thu, 2 Dec 2021 16:48:48 +0000 (11:48 -0500)
committer	Chris Sharp <csharp@georgialibraries.org>
	Tue, 18 Jan 2022 18:10:09 +0000 (13:10 -0500)