1. In the parent router process: wait for all of the immediate
authorscottmk <scottmk@9efc2488-bf62-4759-914b-345cdb29e865>
Sun, 20 Dec 2009 06:37:02 +0000 (06:37 +0000)
committerscottmk <scottmk@9efc2488-bf62-4759-914b-345cdb29e865>
Sun, 20 Dec 2009 06:37:02 +0000 (06:37 +0000)
child processes to terminate before exiting.

This change eliminates the need for the shell script invoking
the router to sleep before running a ps to identify the effective
router processes (which are grandchildren of the parent). By the
time the parent exits, the children will have launched the
grandchildren and exited.

2. If any of the immediate child processes terminates abnormally
(either a non-zero return code or termination by a signal), issue
a warning message to that effect. This message goes to standard
error, since the parent process never opens a log file.

3. Apply the volatile qualifier to a couple of variables that
are updated asynchronously by a signal handler.

M    src/router/osrf_router.c
M    src/router/osrf_router_main.c

git-svn-id: svn://svn.open-ils.org/OpenSRF/trunk@1878 9efc2488-bf62-4759-914b-345cdb29e865

src/router/osrf_router.c
src/router/osrf_router_main.c

index b847807..ede2ee3 100644 (file)
@@ -40,7 +40,7 @@ struct osrfRouterStruct {
        char* resource;       /**< Router's resource name for the Jabber logon. */
        char* password;       /**< Router's password for the Jabber logon. */
        int port;             /**< Jabber's port number. */
-       sig_atomic_t stop;    /**< To be set by signal handler to interrupt main loop. */
+       volatile sig_atomic_t stop; /**< To be set by signal handler to interrupt main loop. */
 
        /** Array of client domains that we allow to send requests through us. */
        osrfStringArray* trustedClients;
index bf3b387..0983991 100644 (file)
@@ -16,6 +16,9 @@
 */
 
 #include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
 #include "opensrf/utils.h"
 #include "opensrf/log.h"
 #include "opensrf/osrf_list.h"
@@ -25,7 +28,7 @@
 
 static osrfRouter* router = NULL;
 
-static sig_atomic_t stop_signal = 0;
+static volatile sig_atomic_t stop_signal = 0;
 
 static void setupRouter(jsonObject* configChunk);
 
@@ -86,6 +89,8 @@ int main( int argc, char* argv[] ) {
 
        /* Spawn child process(es) */
 
+       int rc = EXIT_SUCCESS;
+       int parent = 1;    // boolean
        int i;
        for(i = 0; i < configInfo->size; i++) {
                jsonObject* configChunk = jsonObjectGetIndex(configInfo, i);
@@ -104,18 +109,59 @@ int main( int argc, char* argv[] ) {
                }
                if(fork() == 0) { /* create a new child to run this router instance */
                        setupRouter(configChunk);
+                       parent = 0;
                        break;  /* We're a child; don't spawn any more children here */
                }
        }
 
+       if( parent ) {
+               // Wait for all child processes to terminate.
+               // If any ended abnormally, report it.
+               while( 1 ) {  // Loop until all children terminate
+                       int status;
+                       errno = 0;
+                       pid_t child_pid = wait( &status );
+                       if( -1 == child_pid ) {
+                               // ECHILD means no children are left.  Anything else we ignore.
+                               if( ECHILD == errno )
+                                       break;
+                       } else if( WIFEXITED( status ) ) {
+                               // Relatively normal exit, i.e. via calling exit()
+                               // or _exit(), or by returning from main()
+                               int child_rc = WEXITSTATUS( status );
+                               if( child_rc ) {
+                                       osrfLogWarning( OSRF_LOG_MARK,
+                                               "Child router process %ld exited with return status %d",
+                                               (long) child_pid, child_rc );
+                                       rc = EXIT_FAILURE;
+                               } else {
+                                       ;    // Terminated successfully; silently ignore
+                               }
+                       } else if( WIFSIGNALED( status ) ) {
+                               // Killed by a signal
+                               int signo = WTERMSIG( status );
+                               const char* extra = "";
+#ifdef WCOREDUMP
+                               if( WCOREDUMP( status ) )
+                                       extra = "with core dump ";
+#endif
+                               osrfLogWarning( OSRF_LOG_MARK, "Child router process %ld killed %sby signal %d",
+                                       (long) child_pid, extra, signo );
+
+                               rc = EXIT_FAILURE;
+                       }
+               }
+       }
+
        if( stop_signal ) {
-               // Interrupted by a signal?  Re raise so the parent can see it.
+               // Interrupted by a signal?  Re-raise so the parent can see it.
                osrfLogWarning( OSRF_LOG_MARK, "Interrupted by signal %d; re-raising",
                                (int) stop_signal );
+               signal( stop_signal, SIG_DFL );
                raise( stop_signal );
        }
 
-       return EXIT_SUCCESS;
+       return rc;
 }
 
 /**
@@ -144,7 +190,7 @@ static void setupRouter(jsonObject* configChunk) {
 
        if(!log_file)
        {
-               fprintf(stderr, "Log file name not specified for router\n");
+               osrfLogError( OSRF_LOG_MARK, "Log file name not specified for router" );
                return;
        }
 
@@ -196,7 +242,6 @@ static void setupRouter(jsonObject* configChunk) {
                osrfStringArrayAdd(tclients, clientDomain);
        }
 
-
        if( tclients->size == 0 || tservers->size == 0 ) {
                osrfLogError( OSRF_LOG_MARK,
                                "We need trusted servers and trusted client to run the router...");
@@ -213,7 +258,8 @@ static void setupRouter(jsonObject* configChunk) {
        signal(SIGTERM,routerSignalHandler);
 
        if( (osrfRouterConnect(router)) != 0 ) {
-               fprintf(stderr, "Unable to connect router to jabber server %s... exiting\n", server );
+               osrfLogError( OSRF_LOG_MARK, "Unable to connect router to jabber server %s... exiting",
+                       server );
                osrfRouterFree(router);
                return;
        }