Code

ECHILD error at waitpid on Red Hat systems (Peter Pramberger and
authorTon Voon <tonvoon@users.sourceforge.net>
Mon, 12 Sep 2005 10:31:29 +0000 (10:31 +0000)
committerTon Voon <tonvoon@users.sourceforge.net>
Mon, 12 Sep 2005 10:31:29 +0000 (10:31 +0000)
Sascha Runschke - 1250191)

git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@1213 f882894a-f735-0410-b71e-b25c423dba1c

Makefile.am
config_test/Makefile [new file with mode: 0644]
config_test/child_test.c [new file with mode: 0644]
config_test/run_tests [new file with mode: 0755]
configure.in
plugins/popen.c

index a7c5ffc6053f0fed74d8886b2e836d9795deea09..cc46a465f0471ea976515957be0eb847f7225de7 100644 (file)
@@ -5,7 +5,8 @@ SUBDIRS = intl lib plugins plugins-scripts m4 po
 EXTRA_DIST = config.rpath \
        ABOUT-NLS ACKNOWLEDGEMENTS AUTHORS BUGS CHANGES CODING FAQ LEGAL \
        REQUIREMENTS SUPPORT THANKS \
-       NPTest.pm contrib pkg nagios-plugins.spec
+       NPTest.pm contrib pkg nagios-plugins.spec \
+       config_test/Makefile config_test/run_tests config_test/child_test.c
 
 ACLOCAL_AMFLAGS = -I m4
 
diff --git a/config_test/Makefile b/config_test/Makefile
new file mode 100644 (file)
index 0000000..295696e
--- /dev/null
@@ -0,0 +1,10 @@
+
+all: child_test.c
+       gcc -o child_test child_test.c
+
+test:
+       ./run_tests 10
+
+clean:
+       rm -f child_test
+
diff --git a/config_test/child_test.c b/config_test/child_test.c
new file mode 100644 (file)
index 0000000..e7d8210
--- /dev/null
@@ -0,0 +1,77 @@
+// Base code taken from http://www-h.eng.cam.ac.uk/help/tpl/unix/fork.html
+// Fix for redhat suggested by Ptere Pramberger, peter@pramberger.at
+#include <unistd.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <signal.h>
+void popen_sigchld_handler (int);
+int childtermd;
+
+int main(){
+ char str[1024];
+ int pipefd[2];
+ pid_t pid;
+ int status, died;
+
+        if (signal (SIGCHLD, popen_sigchld_handler) == SIG_ERR) {
+                printf ("Cannot catch SIGCHLD\n");
+               _exit(-1);
+        }
+
+  pipe (pipefd);
+  switch(pid=fork()){
+   case -1: 
+           printf("can't fork\n");
+            _exit(-1);
+   
+   case 0 : // this is the code the child runs 
+            close(1);      // close stdout
+            // pipefd[1] is for writing to the pipe. We want the output
+            // that used to go to the standard output (file descriptor 1)
+            // to be written to the pipe. The following command does this,
+            // creating a new file descripter 1 (the lowest available) 
+            // that writes where pipefd[1] goes.
+            dup (pipefd[1]); // points pipefd at file descriptor
+            // the child isn't going to read from the pipe, so
+            // pipefd[0] can be closed
+            close (pipefd[0]);
+
+           //These are the commands to run, with success commented. dig and nslookup only problems
+            //execl ("/bin/date","date",0);                    // 100%
+           //execl ("/bin/cat", "cat", "/etc/hosts", 0);       // 100%
+           //execl ("/usr/bin/dig", "dig", "redhat.com", 0);   // 69%
+           //execl("/bin/sleep", "sleep", "1", 0);             // 100%
+            execl ("/usr/bin/nslookup","nslookup","redhat.com",0); // 90% (after 100 tests), 40% (after 10 tests)
+            //execl ("/bin/ping","ping","-c","1","localhost",0);       // 100%
+            //execl ("/bin/ping","ping","-c","1","192.168.10.32",0);   // 100%
+           _exit(0);
+
+   default: // this is the code the parent runs 
+
+            close(0); // close stdin
+            // Set file descriptor 0 (stdin) to read from the pipe
+            dup (pipefd[0]);
+            // the parent isn't going to write to the pipe
+            close (pipefd[1]);
+            // Now read from the pipe
+            fgets(str, 1023, stdin);
+            //printf("1st line output is %s\n", str);
+
+           //while (!childtermd);  // Uncomment this line to fix
+
+            died= wait(&status);
+           //printf("died=%d status=%d\n", died, status);
+           if (died > 0) _exit(0);
+           else          _exit(1);
+   }
+}
+
+void
+popen_sigchld_handler (int signo)
+{
+        if (signo == SIGCHLD) {
+                //printf("Caught sigchld\n");
+                childtermd = 1;
+        }
+}
diff --git a/config_test/run_tests b/config_test/run_tests
new file mode 100755 (executable)
index 0000000..e41db23
--- /dev/null
@@ -0,0 +1,16 @@
+#!/bin/ksh
+
+i=0
+success=0
+fail=0
+while [[ $i -lt $1 ]] ; do
+       ./child_test
+       if [[ $? -eq 0 ]] ; then
+               success=$(($success+1))
+       else
+               fail=$((fail+1))
+       fi
+       i=$(($i+1))
+done
+print "Success=$success Fail=$fail"
+[[ $fail -gt 0 ]] && exit 1
index 5453fe1dee87a92377eeb20c145f0d431886b945..4f9dc1c9f0e5fcfc0f44d69f61948c4cf3062cb3 100644 (file)
@@ -1520,6 +1520,22 @@ AC_SUBST(DEPLIBS)
 AM_GNU_GETTEXT([no-libtool], [need-ngettext])
 AM_GNU_GETTEXT_VERSION(0.11.5)
 
+dnl Check for Redhat spopen problem
+dnl Wierd problem where ECHILD is returned from a wait call in error
+dnl Only appears to affect nslookup and dig calls. Only affects redhat around
+dnl 2.6.9-11 (okay in 2.6.9-5). Redhat investigating root cause
+dnl We patch plugins/popen.c
+if echo $ac_cv_uname_r | egrep "\.EL$" >/dev/null 2>&1 ; then
+       AC_MSG_CHECKING(for redhat spopen problem)
+       ( cd config_test && make && make test ) > /dev/null 2>&1
+       if test $? -eq 0 ; then
+               AC_MSG_RESULT(okay)
+       else
+               AC_MSG_RESULT(error)
+               AC_DEFINE(REDHAT_SPOPEN_ERROR, 1, "Problem on redhat with spopen")
+       fi
+fi
+
 dnl External libraries - see ACKNOWLEDGEMENTS
 np_COREUTILS
 np_CURL
index 062cf274a476fe8a775c2c562c777747a046f9f3..f6810691460c8e2fa26ec33359533f6a140c7b5f 100644 (file)
@@ -30,6 +30,9 @@ extern FILE *child_process;
 
 FILE *spopen (const char *);
 int spclose (FILE *);
+#ifdef REDHAT_SPOPEN_ERROR
+RETSIGTYPE popen_sigchld_handler (int);
+#endif
 RETSIGTYPE popen_timeout_alarm_handler (int);
 
 #include <stdarg.h>                                                    /* ANSI C header file */
@@ -67,6 +70,10 @@ char *pname = NULL;                                                  /* caller can set this from argv[0] */
 /*extern pid_t *childpid = NULL; *//* ptr to array allocated at run-time */
 static int maxfd;                                                              /* from our open_max(), {Prog openmax} */
 
+#ifdef REDHAT_SPOPEN_ERROR
+static volatile int childtermd = 0;
+#endif
+
 FILE *
 spopen (const char *cmdstring)
 {
@@ -171,6 +178,12 @@ spopen (const char *cmdstring)
        if (pipe (pfderr) < 0)
                return (NULL);                                                  /* errno set by pipe() */
 
+#ifdef REDHAT_SPOPEN_ERROR
+       if (signal (SIGCHLD, popen_sigchld_handler) == SIG_ERR) {
+               usage4 (_("Cannot catch SIGCHLD"));
+       }
+#endif
+
        if ((pid = fork ()) < 0)
                return (NULL);                                                  /* errno set by fork() */
        else if (pid == 0) {                                    /* child */
@@ -220,6 +233,10 @@ spclose (FILE * fp)
        if (fclose (fp) == EOF)
                return (1);
 
+#ifdef REDHAT_SPOPEN_ERROR
+       while (!childtermd);                                                            /* wait until SIGCHLD */
+#endif
+
        while (waitpid (pid, &status, 0) < 0)
                if (errno != EINTR)
                        return (1);                                                     /* error other than EINTR from waitpid() */
@@ -239,8 +256,16 @@ static int openmax = 0;
 #define        OPEN_MAX_GUESS  256                     /* if OPEN_MAX is indeterminate */
                                /* no guarantee this is adequate */
 
+#ifdef REDHAT_SPOPEN_ERROR
+RETSIGTYPE
+popen_sigchld_handler (int signo)
+{
+       if (signo == SIGCHLD)
+               childtermd = 1;
+}
+#endif
 
-void
+RETSIGTYPE
 popen_timeout_alarm_handler (int signo)
 {
        int fh;