From f44ef93ff860a43f49a8dc54da8d452afad7620d Mon Sep 17 00:00:00 2001
From: erickson <erickson@9efc2488-bf62-4759-914b-345cdb29e865>
Date: Mon, 25 Jan 2010 16:29:19 +0000
Subject: [PATCH] Added some fault tolerance to the inbound-to-unixserver
 message handoff process.  In some conditions, the unix socket will result in
 sigpipe (anecdotally, seen more oftenvirtualized environments).  these
 changes add a call to select before writing to the socket to check for socket
 availability and add a sigpipe handler that forces the inbound process to
 wait a brief period of time before trying the delivery again

git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1901 9efc2488-bf62-4759-914b-345cdb29e865
---
 .../lib/OpenSRF/Transport/SlimJabber/Inbound.pm    | 65 ++++++++++++++++------
 1 file changed, 47 insertions(+), 18 deletions(-)

diff --git a/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm b/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm
index 5b72fe5..898a528 100644
--- a/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm
+++ b/src/perl/lib/OpenSRF/Transport/SlimJabber/Inbound.pm
@@ -82,6 +82,8 @@ sub DESTROY {
 		}
 	}
 }
+
+my $sig_pipe = 0;
 	
 sub listen {
 	my $self = shift;
@@ -125,47 +127,74 @@ sub listen {
 		# no routers defined
 	};
 
+    my $app = $self->{app};
 
-	
-			
-	$logger->transport( $self->{app} . " going into listen loop", INFO );
+	$logger->info("$app inbound: going into listen loop" );
 
 	while(1) {
 	
 		my $sock = $self->unix_sock();
 		my $o;
 
-		$logger->debug("Inbound listener calling process()");
-
 		try {
 			$o = $self->process(-1);
 
 			if(!$o){
-				$logger->error(
-					"Inbound received no data from the Jabber socket in process()");
+				$logger->error("$app inbound: received no data from the Jabber socket in process()");
 				usleep(100000); # otherwise we loop and pound syslog logger with errors
 			}
 
 		} catch OpenSRF::EX::JabberDisconnected with {
 
-			$logger->error("Inbound process lost its ".
-				"jabber connection.  Attempting to reconnect...");
+			$logger->error("$app inbound: process lost its jabber connection.  Attempting to reconnect...");
 			$self->initialize;
 			$o = undef;
 		};
 
+        next unless $o;
+
+        while(1) {
+            # keep trying to deliver the message until we succeed
+
+            my $socket = IO::Socket::UNIX->new( Peer => $sock  );
+
+            unless($socket and $socket->connected) {
+                $logger->error("$app inbound: unable to connect to inbound socket $sock: $!");
+                usleep(50000); # 50 msec
+                next;
+            }
+
+            # block until the pipe is ready for writing
+            my $outfile = ''; 
+            vec($outfile, $socket->fileno, 1) = 1;
+            my $nfound = select(undef, $outfile, undef, undef);
+
+            next unless $nfound; # should not happen since we're blocking
 
-		if($o) {
-			my $socket = IO::Socket::UNIX->new( Peer => $sock  );
-			throw OpenSRF::EX::Socket( 
-				"Unable to connect to UnixServer: socket-file: $sock \n :=> $! " )
-				unless ($socket->connected);
-			print $socket freeze($o);
-			$socket->close;
-		} 
+            if($nfound == -1) { # select failed
+                $logger->error("$app inbound: unable to write to socket: $!");
+                usleep(50000); # 50 msec
+                next;
+            }
+
+            $sig_pipe = 0;
+            local $SIG{'PIPE'} = sub { $sig_pipe = 1; };
+            print $socket freeze($o);
+
+            if($sig_pipe) {
+                # The attempt to write to the socket failed.  Wait a short time then try again.
+                # Don't bother closing the socket, it will only cause grief
+                $logger->error("$app inbound: got SIGPIPE, will retry after a short wait..."); 
+                usleep(50000); # 50 msec
+                next;
+            } 
+                
+            $socket->close;
+            last;
+        }
 	}
 
-	throw OpenSRF::EX::Socket( "How did we get here?!?!" );
+    $logger->error("$app inbound: exited process loop");
 }
 
 1;
-- 
2.11.0