Code

Add timeout to plugin execution
authorSven Velt <sven@velt.de>
Tue, 27 Jul 2010 11:57:42 +0000 (13:57 +0200)
committerSven Velt <sven@velt.de>
Tue, 27 Jul 2010 11:57:42 +0000 (13:57 +0200)
It's now possible to terminate plugins after a few seconds. Just set
"plugin_timeout" and maybe "plugin_timeout_returncode" in "[nagixsc]"
section in the conf file. Sample config "timeout.conf" included in
"sample-configs/conf/".

Signed-off-by: Sven Velt <sven@velt.de>
nagixsc/__init__.py
sample-configs/conf/timeout.conf [new file with mode: 0644]

index 321407bc3abf7a84659065410afd50b0600ff083..644c9ab624b78cc35b748f5a46637cd3d67e5e31 100644 (file)
@@ -8,6 +8,7 @@ import mimetools
 import os
 import random
 import shlex
+import signal
 import socket
 import string
 import subprocess
@@ -20,6 +21,11 @@ def debug(level, verb, string):
 
 ##############################################################################
 
+class ExecTimeoutError(Exception):
+       pass
+
+##############################################################################
+
 def available_encodings():
        return ['base64', 'plain',]
 
@@ -60,7 +66,10 @@ def read_inifile(inifile):
 
 ##############################################################################
 
-def exec_check(host_name, service_descr, cmdline):
+def exec_timeout_handler(signum, frame):
+       raise ExecTimeoutError
+
+def exec_check(host_name, service_descr, cmdline, timeout=None, timeout_returncode=2):
        cmdarray = shlex.split(cmdline)
 
        check = {}
@@ -72,13 +81,27 @@ def exec_check(host_name, service_descr, cmdline):
                check['returncode'] = 127
                return check
 
+       if timeout:
+               signal.signal(signal.SIGALRM, exec_timeout_handler)
+               signal.alarm(timeout)
+
        try:
-               cmd     = subprocess.Popen(cmdarray, stdout=subprocess.PIPE)
+               cmd = subprocess.Popen(cmdarray, stdout=subprocess.PIPE)
                check['output'] = cmd.communicate()[0].rstrip()
                check['returncode'] = cmd.returncode
        except OSError:
                check['output'] = 'Could not execute "%s"' % cmdline
                check['returncode'] = 127
+       except ExecTimeoutError:
+               check['output'] = 'Plugin timed out after %s seconds' % timeout
+               check['returncode'] = timeout_returncode
+
+       if timeout:
+               signal.alarm(0)
+               try:
+                       cmd.terminate()
+               except OSError:
+                       pass
 
        check['timestamp'] = datetime.datetime.now().strftime('%s')
        return check
@@ -89,6 +112,18 @@ def exec_check(host_name, service_descr, cmdline):
 def conf2dict(config, opt_host=None, opt_service=None):
        checks = []
 
+       # Read "plugin_timeout" from "[nagixsc]", default "None" (no timeout)
+       try:
+               timeout = config.getint('nagixsc','plugin_timeout')
+       except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
+               timeout = None
+
+       # Read "plugin_timeout_returncode" from "[nagixsc]", default "2" (CRITICAL)
+       try:
+               timeout_returncode = config.getint('nagixsc','plugin_timeout_returncode')
+       except (ConfigParser.NoSectionError, ConfigParser.NoOptionError):
+               timeout_returncode = 2
+
        # Sections are Hosts (not 'nagixsc'), options in sections are Services
        hosts = config.sections()
        if 'nagixsc' in hosts:
@@ -113,7 +148,7 @@ def conf2dict(config, opt_host=None, opt_service=None):
                # Look for host check
                if '_host_check' in services and not opt_service:
                        cmdline = config.get(host, '_host_check')
-                       check = exec_check(host_name, None, cmdline)
+                       check = exec_check(host_name, None, cmdline, timeout, timeout_returncode)
                        checks.append(check)
 
 
@@ -129,7 +164,7 @@ def conf2dict(config, opt_host=None, opt_service=None):
                        if service[0] != '_':
                                cmdline = config.get(host, service)
 
-                               check = exec_check(host_name, service, cmdline)
+                               check = exec_check(host_name, service, cmdline, timeout, timeout_returncode)
                                checks.append(check)
 
        return checks
diff --git a/sample-configs/conf/timeout.conf b/sample-configs/conf/timeout.conf
new file mode 100644 (file)
index 0000000..8d158e0
--- /dev/null
@@ -0,0 +1,19 @@
+[nagixsc]
+# plugin_timeout
+#
+# Timeout of plugin execution in seconds
+# Default: No timeout
+plugin_timeout: 2
+
+# plugin_timeout_returncode
+#
+# Set exit/return code of timed out plugins
+# Default: 2 (CRITICAL)
+plugin_timeout_returncode: 3
+
+[timeout]
+Timeout-1s: /bin/bash -c "sleep 1; echo No or bigger than 1 seconds timeout"
+Timeout-2s: /bin/bash -c "sleep 2; echo No or bigger than 2 seconds timeout"
+Timeout-3s: /bin/bash -c "sleep 3; echo No or bigger than 3 seconds timeout"
+Timeout-4s: /bin/bash -c "sleep 4; echo No or bigger than 4 seconds timeout"
+