Code

Jerome Tytgat - accumulated/percentage CPU/MEM per process - bash plugin
authorSubhendu Ghosh <sghosh@users.sourceforge.net>
Thu, 4 Jul 2002 22:02:18 +0000 (22:02 +0000)
committerSubhendu Ghosh <sghosh@users.sourceforge.net>
Thu, 4 Jul 2002 22:02:18 +0000 (22:02 +0000)
git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@63 f882894a-f735-0410-b71e-b25c423dba1c

contrib/check_procl.sh [new file with mode: 0644]

diff --git a/contrib/check_procl.sh b/contrib/check_procl.sh
new file mode 100644 (file)
index 0000000..b1793ad
--- /dev/null
@@ -0,0 +1,400 @@
+#!/bin/bash
+
+#
+# Check_procl.sh 
+# 
+# Program: Process load check plugin for Nagios
+# License : GPL
+# Copyright (c) 2002 Jerome Tytgat (j.tytgat@sioban.net)
+#
+# check_procl.sh,v 1.1 2002/07/04 09:35 
+#
+# Description :
+#   
+#  This plugin is for check the %cpu, %mem or cputime of one or more process
+#
+# Usage :
+#
+#  check_procl.sh -p process1,process2,... -w a.b -c c.d --cpu 
+#  check_procl.sh -p process1,process2,... -w a.b -c c.d --mem
+#  check_procl.sh -p process1,process2,... -w a:b:c -c d:e:f --cputime
+#
+#  check_procl.sh -p %all% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime>
+#  check_procl.sh -p %max% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime>
+#
+# Example :
+#   
+#  To know the memory eaten by HTTPD processes, be warned when it reach 50% and be critical when it reach 75%
+#      check_procl.sh -p httpd -w 50.0 -c 75.0 --mem
+#      > OK - total %MEM for process httpd : 46.1
+#
+#  To know the process which eat the more cpu time, but as we are under linux and are using kapm we do :
+#      check_procl.sh -p %max% -e kapmd-idle,kapmd -w 0:1:0 -c 0:2:0 --cputime
+#      > CRITICAL - total CPUTIME for process named : 02:32:10
+#
+# Tested on solaris 7/8, Linux Redhat 7.3 and Linux Suse 7.1
+#
+# BUGS : problems with handling time on solaris...
+
+
+help_usage() {
+        echo "Usage:"
+        echo " $0 -p <process_name1,process_name2,... | %all% | %max%>"
+        echo "   [-e <process_name1,process_name2,...>] -w warning -c critical < --cpu | --mem | --cputime>"
+        echo " $0 (-v | --version)"
+        echo " $0 (-h | --help)"
+}
+
+help_version() {
+        echo "check_procl.sh (nagios-plugins) 1.1"
+        echo "The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute"
+        echo "copies of the plugins under the terms of the GNU General Public License."
+       echo "For more information about these matters, see the file named COPYING."
+        echo "Copyright (c) 2002 Jerome Tytgat - j.tytgat@sioban.net"
+       echo "Greetings goes to Websurg which kindly let me took time to develop this"
+        echo "                  Manu Feig and Jacques Kern who were my beta testers, thanks to them !"
+}
+
+verify_dep() {
+       needed="bash cut egrep expr grep let ps sed sort tail test tr wc"
+       for i in `echo $needed`
+       do
+               type $i > /dev/null 2>&1 /dev/null
+               if [ $? -eq 1 ]
+               then
+                       echo "I am missing an important component : $i"
+                       echo "Cannot continue, sorry, try to find the missing one..."
+                       exit 3
+               fi
+       done
+}
+
+myself=$0
+
+verify_dep
+
+if [ "$1" = "-h" -o "$1" = "--help" ]
+then 
+       help_version    
+       echo ""
+       echo "This plugin will check either the cumulutative %cpu, %mem or cputime"
+       echo "of a process."
+       echo ""
+       help_usage
+       echo ""
+       echo "Required Arguments:"
+        echo " -p, --process STRING1,STRING2,..."
+        echo "    names of the processes we want to monitor,"
+        echo "    you can add as much as process as you want, separated by comma,"
+        echo "    hey will be cumulated"
+        echo " -p, --process %all%"
+        echo "    The special keyword %all% will check the cumulative cpu/mem/time of all process"
+       echo "    WARNING : Can be very slow on heavy loaded servers, watch your timeout !"
+        echo " -p, --process %max%"
+        echo "    The special keyword %max% will check the process which eat the most"
+       echo "    WARNING : only select the process which eat the more, not the cumulative,"
+       echo "              but return the cumulative"
+       echo " -w, --warning INTEGER.INTEGER or INTERGER:INTEGER:INTEGER"
+       echo "    generate warning state if process count is outside this range"
+       echo " -c, --critical INTEGER.INTEGER or INTERGER:INTEGER:INTEGER"
+       echo "    generate critical state if process count is outside this range"
+        echo " --cpu"
+        echo "    return the current cpu usage for the given process"
+        echo " --mem"
+        echo "    return the current memory usage for the given process"
+        echo " --cputime"
+        echo "    return the total cputime usage for the given process"
+       echo ""
+        echo "Optional Argument:"
+        echo " -e, --exclude-process STRING1,STRING2,..."
+        echo "    names of the processes we want don't want to monitor"
+        echo "    only useful when associated with %all% or %max% keywords, else ignored"
+        echo "    ex : kapm-idled on linux is a process which eat memory / cputime but not really... ;-)"
+       echo ""
+       exit 3
+fi
+
+if [ "$1" = "-v" -o "$1" = "--version" ]
+then
+       help_version
+        exit 3
+fi
+
+if [ `echo $@|tr "=" " "|wc -w` -lt 7 ]
+then 
+       echo "Bad arguments number (need at least 7)!"
+       help_usage
+       exit 3
+fi
+
+tt=0
+process_name=""
+exclude_process_name=""
+wt=""
+ct=""
+
+# Test of the command lines arguments
+while test $# -gt 0
+do
+       
+       case "$1" in
+               -p|--process)
+                       if [ -n "$process_name" ]
+                       then
+                               echo "Only one --process argument is useful..."
+                                help_usage
+                                exit 3
+                       fi
+                       shift
+                       process_name="`echo $1|tr \",\" \"|\"`"
+                       ;;
+                -e|--exclude-process)
+                        if [ -n "$exclude_process_name" ]
+                        then
+                                echo "Only one --exclude-process argument is useful..."
+                                help_usage
+                                exit 3
+                        fi
+                        shift
+                        exclude_process_name="`echo $1|tr \",\" \"|\"`"
+                        ;;
+                -w|--warning)
+                        if [ -n "$wt" ]
+                        then
+                                echo "Only one --warning argument needed... Trying to test bad things ? :-)"
+                                help_usage
+                                exit 3
+                        fi
+                       shift
+                       wt=$1
+                       ;;
+                -c|--critical)
+                        if [ -n "$ct" ]
+                        then
+                                echo "Only one --critical argument needed... Trying to test bad things ? :-)"
+                                help_usage
+                                exit 3
+                        fi
+                       shift
+                       ct=$1
+                       ;;
+               --cpu)
+                       if [ $tt -eq 0 ]
+                       then
+                                       tt=1
+                       else
+                                echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !"
+                               help_usage
+                               exit 3
+                       fi
+                       type_arg_aff="%CPU"             
+                       type_arg="pcpu"         
+                       delim="."
+                       ;;
+               --mem)
+                       if [ $tt -eq 0 ]
+                       then
+                               tt=2
+                       else
+                                echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !"
+                               help_usage
+                               exit 3
+                       fi
+                       type_arg_aff="%MEM"
+                       type_arg="pmem"
+                       delim="."
+                       ;;
+               --cputime)
+                        if [ $tt -eq 0 ]
+                        then
+                                tt=3
+                        else
+                                echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !"
+                                help_usage
+                                exit 3
+                        fi
+                       type_arg_aff="TIME"
+                       type_arg="time"
+                       delim=":"
+                       ;;
+               *)
+                       echo "Unknown argument $1"
+                       help_usage
+                       exit 3
+                       ;;
+       esac
+       shift
+done
+
+# Is the process running ?
+if [ -z "`ps -e | egrep \"$process_name?\"`" -a "$process_name" != "%all%" -a "$process_name" != "%max%" ]
+then
+       echo "WARNING: process $process_name not running !"
+       exit 3
+fi
+
+# Cut of warning and critical values
+wt_value1=`echo $wt|cut -d"$delim" -f1`
+wt_value2=`echo $wt|cut -d"$delim" -f2`
+ct_value1=`echo $ct|cut -d"$delim" -f1`
+ct_value2=`echo $ct|cut -d"$delim" -f2`
+
+if [ $tt -eq 3 ]
+then
+       wt_value3=`echo $wt|cut -d"$delim" -f3`
+       ct_value3=`echo $ct|cut -d"$delim" -f3`
+else
+       wt_value3=0
+       ct_value3=0
+fi
+
+# Integrity check of warning and critical values
+if [ -z "$wt_value1" -o -z "$wt_value2" -o -z "$wt_value3" ]
+then
+        echo "Bad expression in the WARNING field : $wt"
+       help_usage
+        exit 3
+fi
+
+if [ "`echo $wt_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value3|tr -d \"[:digit:]\"`" != "" ]
+then
+        echo "Bad expression in the WARNING field : $wt"
+       help_usage
+        exit 3
+fi
+
+if [ -z "$ct_value1" -o -z "$ct_value2" -o -z "$ct_value3" ]
+then
+        echo "Bad expression in the CRITICAL field : $ct"
+        help_usage
+        exit 3
+fi
+
+
+if [ "`echo $ct_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value3|tr -d \"[:digit:]\"`" != "" ]
+then
+        echo "Bad expression in the CRITICAL field : $ct"
+       help_usage
+        exit 3
+fi
+
+# ps line construction set...
+case "$process_name" in 
+       %all%)
+               if [ -z "$exclude_process_name" ]
+               then
+                       psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
+               else
+                       psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
+               fi
+               ;;
+       %max%)
+                if [ -z "$exclude_process_name" ]
+                then
+                       pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2`
+               else
+                       pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2`
+               fi
+               psline=`ps -eo $type_arg,comm|grep $pstmp|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
+               process_name=$pstmp
+               ;;
+       *)
+               psline=`ps -eo $type_arg,comm|egrep "$process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1`
+               ;;
+esac
+
+total1=0
+total2=0
+total3=0
+
+
+# fetching the values
+for i in $psline
+do
+       # Special case for solaris - several format exist for the time function...
+       if [ ${#i} -le 6 -a "$tt" -eq 3 ]
+       then
+               i="00:$i"
+       fi 
+       value1=`echo $i|cut -d$delim -f1`
+       value2=`echo $i|cut -d$delim -f2`
+       value3=`echo $i|cut -d$delim -f3`
+       value3=`test -z "$value3" && echo 0 || echo $value3`
+       total1=`expr $total1 + $value1`
+       total2=`expr $total2 + $value2`
+       total3=`expr $total3 + $value3`
+       if [ $tt -eq 3 ]
+       then
+               if [ $total3 -ge 60 ]
+                then
+                       let total2+=1
+                        let total3-=60
+                fi
+                if [ $total2 -ge 60 ]
+                then
+                        let total1+=1
+                        let total2-=60
+                fi
+       else
+               if [ $total2 -ge 10 ]
+               then
+                       let total1+=1
+                       let total2=total2-10
+               fi
+       fi
+done
+
+warn=0
+crit=0
+
+# evaluation of the cumulative values vs warning and critical values
+case "$tt" in
+       1)
+               return_total="$total1.$total2"
+               test $total1 -gt $ct_value1 && crit=1
+               test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1
+               test $total1 -gt $wt_value1 && warn=1
+               test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1
+               ;;
+       2)
+               return_total="$total1.$total2"
+                test $total1 -gt $ct_value1 && crit=1
+                test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1
+                test $total1 -gt $wt_value1 && warn=1
+                test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1
+               ;;
+       3)
+               return_total="`test ${#total1} -eq 1 && echo 0`$total1:`test ${#total2} -eq 1 && echo 0`$total2:`test ${#total3} -eq 1 && echo 0`$total3"
+                test $total1 -gt $ct_value1 && crit=1
+                test $total1 -eq $ct_value1 -a $total2 -gt $ct_value2 && crit=1
+                test $total1 -eq $ct_value1 -a $total2 -eq $ct_value2 -a $total3 -ge $ct_value3 && crit=1
+                test $total1 -gt $wt_value1 && warn=1
+                test $total1 -eq $wt_value1 -a $total2 -gt $wt_value2 && warn=1
+                test $total1 -eq $wt_value1 -a $total2 -eq $wt_value2 -a $total3 -ge $wt_value3 && warn=1
+               ;;
+esac
+
+# last check ...
+if [ $crit -eq 1 -a $warn -eq 0 ]
+then
+       echo "Critical value must be greater than warning value !"
+       help_usage
+       exit 3
+fi
+
+# Finally Inform Nagios of what we found...
+if [ $crit -eq 1 ]
+then
+       echo "CRITICAL - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total"
+       exit 2
+elif [ $warn -eq 1 ]
+then
+       echo "WARNING - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total"
+       exit 1
+else
+       echo "OK - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total"
+       exit 0
+fi
+
+# Hey what are we doing here ???
+exit 3
\ No newline at end of file