1 /******************************************************************************
2 *
3 * CHECK_PROCS.C
4 *
5 * Program: Process plugin for Nagios
6 * License: GPL
7 * Copyright (c) 1999 Ethan Galstad (nagios@nagios.org)
8 *
9 * $Id$
10 *
11 * Description:
12 *
13 * This plugin checks the number of currently running processes and
14 * generates WARNING or CRITICAL states if the process count is outside
15 * the specified threshold ranges. The process count can be filtered by
16 * process owner, parent process PID, current state (e.g., 'Z'), or may
17 * be the total number of running processes
18 *
19 * License Information:
20 *
21 * This program is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful, but
27 * WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
34 *
35 ******************************************************************************/
37 const char *progname = "check_procs";
38 #define REVISION "$Revision$"
39 #define COPYRIGHT "1999-2002"
40 #define AUTHOR "Ethan Galstad"
41 #define EMAIL "nagios@nagios.org"
42 #define SUMMARY "\
43 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
44 metric is outside the required threshold ranges. The metric defaults to number\n\
45 of processes. Search filters can be applied to limit the processes to check.\n"
47 #define OPTIONS "\
48 -w <range> -c <range> [-m metric]\n\
49 [-s state] [-p ppid] [-u user] [-r rss] [-z vsz] [-P %cpu]\n\
50 [-a argument-array] [-C command] [-v]"
52 #define LONGOPTIONS "\
53 Required Arguments:\n\
54 -w, --warning=RANGE\n\
55 Generate warning state if metric is outside this range\n\
56 -c, --critical=RANGE\n\
57 Generate critical state if metric is outside this range\n\
58 Optional Arguments:\n\
59 -m, --metric=TYPE\n\
60 Check thresholds against metric. Valid types:\n\
61 PROCS - number of processes (default)\n\
62 VSZ - virtual memory size\n\
63 RSS - resident set memory size\n\
64 CPU - percentage cpu\n\
65 -v, --verbose\n\
66 Extra information. Up to 3 verbosity levels\n\
67 Optional Filters:\n\
68 -s, --state=STATUSFLAGS\n\
69 Only scan for processes that have, in the output of `ps`, one or\n\
70 more of the status flags you specify (for example R, Z, S, RS,\n\
71 RSZDT, plus others based on the output of your 'ps' command).\n\
72 -p, --ppid=PPID\n\
73 Only scan for children of the parent process ID indicated.\n\
74 -z, --vsz=VSZ\n\
75 Only scan for processes with vsz higher than indicated.\n\
76 -r, --rss=RSS\n\
77 Only scan for processes with rss higher than indicated.\n\
78 -P, --pcpu=PCPU\n\
79 Only scan for processes with pcpu higher than indicated.\n\
80 -u, --user=USER\n\
81 Only scan for processes with user name or ID indicated.\n\
82 -a, --argument-array=STRING\n\
83 Only scan for processes with args that contain STRING.\n\
84 -C, --command=COMMAND\n\
85 Only scan for exact matches to the named COMMAND.\n\
86 \n\
87 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
88 specified 'max:min', a warning status will be generated if the\n\
89 count is inside the specified range\n"
91 #define EXAMPLES "\
92 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
93 Warning if not two processes with command name portsentry. Critical\n\
94 if < 2 or > 1024 processes\n\
95 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
96 Warning alert if > 10 processes with command arguments containing \n\
97 '/usr/local/bin/perl' and owned by root\n\
98 check_procs -w 50000 -c 100000 --metric=VSZ\n\
99 Alert if vsz of any processes over 50K or 100K\n"
101 #include "config.h"
102 #include <pwd.h>
103 #include "common.h"
104 #include "popen.h"
105 #include "utils.h"
107 int process_arguments (int, char **);
108 int validate_arguments (void);
109 void print_usage (void);
110 void print_help (void);
111 int check_thresholds (int);
113 int wmax = -1;
114 int cmax = -1;
115 int wmin = -1;
116 int cmin = -1;
118 int options = 0; /* bitmask of filter criteria to test against */
119 #define ALL 1
120 #define STAT 2
121 #define PPID 4
122 #define USER 8
123 #define PROG 16
124 #define ARGS 32
125 #define VSZ 64
126 #define RSS 128
127 #define PCPU 256
129 /* Different metrics */
130 int metric = 0;
131 #define METRIC_PROCS 0
132 #define METRIC_VSZ 1
133 #define METRIC_RSS 2
134 #define METRIC_CPU 3
135 char *metric_name = "";
137 int verbose = 0;
138 int uid;
139 int ppid;
140 int vsz;
141 int rss;
142 float pcpu;
143 char *statopts = "";
144 char *prog = "";
145 char *args = "";
146 char *fmt = "";
147 char *fails = "";
148 char tmp[MAX_INPUT_BUFFER];
150 int
151 main (int argc, char **argv)
152 {
153 char input_buffer[MAX_INPUT_BUFFER];
155 int procuid = 0;
156 int procppid = 0;
157 int procvsz = 0;
158 int procrss = 0;
159 float procpcpu = 0;
160 char procstat[8];
161 char procprog[MAX_INPUT_BUFFER];
162 char *procargs;
164 const char *zombie = "Z";
166 int resultsum = 0; /* bitmask of the filter criteria met by a process */
167 int found = 0; /* counter for number of lines returned in `ps` output */
168 int procs = 0; /* counter for number of processes meeting filter criteria */
169 int pos; /* number of spaces before 'args' in `ps` output */
170 int cols; /* number of columns in ps output */
171 int warn = 0; /* number of processes in warn state */
172 int crit = 0; /* number of processes in crit state */
173 int i;
175 int result = STATE_UNKNOWN;
177 asprintf (&metric_name, "PROCS");
178 metric = METRIC_PROCS;
180 if (process_arguments (argc, argv) == ERROR)
181 usage ("Unable to parse command line\n");
183 if (verbose >= 2)
184 printf ("CMD: %s\n", PS_COMMAND);
186 child_process = spopen (PS_COMMAND);
187 if (child_process == NULL) {
188 printf ("Could not open pipe: %s\n", PS_COMMAND);
189 return STATE_UNKNOWN;
190 }
192 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
193 if (child_stderr == NULL)
194 printf ("Could not open stderr for %s\n", PS_COMMAND);
196 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
198 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
199 strcpy(procprog,"");
200 asprintf(&procargs,"");
202 cols = sscanf (input_buffer, PS_FORMAT, PS_VARLIST);
204 /* Zombie processes do not give a procprog command */
205 if ( cols == 6 && strstr(procstat, zombie) ) {
206 cols = 7;
207 /* Set some value for procargs for the strip command further below
208 Seen to be a problem on some Solaris 7 and 8 systems */
209 input_buffer[pos] = '\n';
210 input_buffer[pos+1] = 0x0;
211 }
212 if ( cols >= 7 ) {
213 resultsum = 0;
214 asprintf (&procargs, "%s", input_buffer + pos);
215 strip (procargs);
217 if ((options & STAT) && (strstr (statopts, procstat)))
218 resultsum |= STAT;
219 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
220 resultsum |= ARGS;
221 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
222 resultsum |= PROG;
223 if ((options & PPID) && (procppid == ppid))
224 resultsum |= PPID;
225 if ((options & USER) && (procuid == uid))
226 resultsum |= USER;
227 if ((options & VSZ) && (procvsz >= vsz))
228 resultsum |= VSZ;
229 if ((options & RSS) && (procrss >= rss))
230 resultsum |= RSS;
231 if ((options & PCPU) && (procpcpu >= pcpu))
232 resultsum |= PCPU;
234 if (verbose >= 3)
235 printf ("%d %d %d %d %d %.2f %s %s %s\n",
236 procs, procuid, procvsz, procrss,
237 procppid, procpcpu, procstat, procprog, procargs);
239 /* Ignore self */
240 if (strcmp (procprog, progname) == 0)
241 continue;
243 found++;
245 /* Next line if filters not matched */
246 if (!(options == resultsum || options == ALL))
247 continue;
249 procs++;
251 if (metric == METRIC_VSZ)
252 i = check_thresholds (procvsz);
253 else if (metric == METRIC_RSS)
254 i = check_thresholds (procrss);
255 /* TODO? float thresholds for --metric=CPU */
256 else if (metric == METRIC_CPU)
257 i = check_thresholds ((int)procpcpu);
259 if (metric != METRIC_PROCS) {
260 if (i == STATE_WARNING) {
261 warn++;
262 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
263 }
264 if (i == STATE_CRITICAL) {
265 crit++;
266 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
267 }
268 result = max_state (result, i);
269 }
270 }
271 /* This should not happen */
272 else if (verbose) {
273 printf("Not parseable: %s", input_buffer);
274 }
275 }
277 /* If we get anything on STDERR, at least set warning */
278 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
279 if (verbose)
280 printf ("STDERR: %s", input_buffer);
281 result = max_state (result, STATE_WARNING);
282 printf ("System call sent warnings to stderr\n");
283 }
285 (void) fclose (child_stderr);
287 /* close the pipe */
288 if (spclose (child_process)) {
289 printf ("System call returned nonzero status\n");
290 result = max_state (result, STATE_WARNING);
291 }
293 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
294 printf ("Unable to read output\n");
295 return result;
296 }
298 if ( result == STATE_UNKNOWN )
299 result = STATE_OK;
301 /* Needed if procs found, but none match filter */
302 if ( metric == METRIC_PROCS ) {
303 result = max_state (result, check_thresholds (procs) );
304 }
306 if ( result == STATE_OK ) {
307 printf ("%s OK: %d process%s",
308 metric_name, procs, ( procs != 1 ? "es" : "") );
309 } else if (result == STATE_WARNING) {
310 if ( metric == METRIC_PROCS ) {
311 printf ("PROCS WARNING: %d process%s", procs,
312 ( procs != 1 ? "es" : ""));
313 } else {
314 printf ("%s WARNING: %d warn out of %d process%s",
315 metric_name, warn, procs,
316 ( procs != 1 ? "es" : ""));
317 }
318 } else if (result == STATE_CRITICAL) {
319 if (metric == METRIC_PROCS) {
320 printf ("PROCS CRITICAL: %d process%s", procs,
321 ( procs != 1 ? "es" : ""));
322 } else {
323 printf ("%s CRITICAL: %d crit, %d warn out of %d process%s",
324 metric_name, crit, warn, procs,
325 ( procs != 1 ? "es" : ""));
326 }
327 }
329 if (fmt != "") {
330 printf (" with %s", fmt);
331 }
333 if ( verbose >= 1 && fails != "" )
334 printf (" [%s]", fails);
336 printf ("\n");
337 return result;
338 }
340 /* Check thresholds against value */
341 int
342 check_thresholds (int value)
343 {
344 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
345 return OK;
346 }
347 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
348 if (value > cmax && value < cmin)
349 return STATE_CRITICAL;
350 }
351 else if (cmax >= 0 && value > cmax) {
352 return STATE_CRITICAL;
353 }
354 else if (cmin >= 0 && value < cmin) {
355 return STATE_CRITICAL;
356 }
358 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
359 if (value > wmax && value < wmin) {
360 return STATE_WARNING;
361 }
362 }
363 else if (wmax >= 0 && value > wmax) {
364 return STATE_WARNING;
365 }
366 else if (wmin >= 0 && value < wmin) {
367 return STATE_WARNING;
368 }
369 return STATE_OK;
370 }
372 /* process command-line arguments */
373 int
374 process_arguments (int argc, char **argv)
375 {
376 int c = 1;
377 char *user;
378 struct passwd *pw;
379 int option_index = 0;
380 static struct option long_options[] = {
381 {"warning", required_argument, 0, 'w'},
382 {"critical", required_argument, 0, 'c'},
383 {"metric", required_argument, 0, 'm'},
384 {"timeout", required_argument, 0, 't'},
385 {"status", required_argument, 0, 's'},
386 {"ppid", required_argument, 0, 'p'},
387 {"command", required_argument, 0, 'C'},
388 {"vsz", required_argument, 0, 'z'},
389 {"rss", required_argument, 0, 'r'},
390 {"pcpu", required_argument, 0, 'P'},
391 {"argument-array", required_argument, 0, 'a'},
392 {"help", no_argument, 0, 'h'},
393 {"version", no_argument, 0, 'V'},
394 {"verbose", no_argument, 0, 'v'},
395 {0, 0, 0, 0}
396 };
398 for (c = 1; c < argc; c++)
399 if (strcmp ("-to", argv[c]) == 0)
400 strcpy (argv[c], "-t");
402 while (1) {
403 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
404 long_options, &option_index);
406 if (c == -1 || c == EOF)
407 break;
409 switch (c) {
410 case '?': /* help */
411 print_usage ();
412 exit (STATE_UNKNOWN);
413 case 'h': /* help */
414 print_help ();
415 exit (STATE_OK);
416 case 'V': /* version */
417 print_revision (progname, REVISION);
418 exit (STATE_OK);
419 case 't': /* timeout period */
420 if (!is_integer (optarg)) {
421 printf ("%s: Timeout Interval must be an integer!\n\n",
422 progname);
423 print_usage ();
424 exit (STATE_UNKNOWN);
425 }
426 timeout_interval = atoi (optarg);
427 break;
428 case 'c': /* critical threshold */
429 if (is_integer (optarg)) {
430 cmax = atoi (optarg);
431 break;
432 }
433 else if (sscanf (optarg, ":%d", &cmax) == 1) {
434 break;
435 }
436 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2) {
437 break;
438 }
439 else if (sscanf (optarg, "%d:", &cmin) == 1) {
440 break;
441 }
442 else {
443 printf ("%s: Critical Process Count must be an integer!\n\n",
444 progname);
445 print_usage ();
446 exit (STATE_UNKNOWN);
447 }
448 case 'w': /* warning time threshold */
449 if (is_integer (optarg)) {
450 wmax = atoi (optarg);
451 break;
452 }
453 else if (sscanf (optarg, ":%d", &wmax) == 1) {
454 break;
455 }
456 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2) {
457 break;
458 }
459 else if (sscanf (optarg, "%d:", &wmin) == 1) {
460 break;
461 }
462 else {
463 printf ("%s: Warning Process Count must be an integer!\n\n",
464 progname);
465 print_usage ();
466 exit (STATE_UNKNOWN);
467 }
468 case 'p': /* process id */
469 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
470 asprintf (&fmt, "%s%sPPID = %d", fmt, (options ? ", " : ""), ppid);
471 options |= PPID;
472 break;
473 }
474 printf ("%s: Parent Process ID must be an integer!\n\n",
475 progname);
476 print_usage ();
477 exit (STATE_UNKNOWN);
478 case 's': /* status */
479 asprintf (&statopts, "%s", optarg);
480 asprintf (&fmt, "%s%sSTATE = %s", fmt, (options ? ", " : ""), statopts);
481 options |= STAT;
482 break;
483 case 'u': /* user or user id */
484 if (is_integer (optarg)) {
485 uid = atoi (optarg);
486 pw = getpwuid ((uid_t) uid);
487 /* check to be sure user exists */
488 if (pw == NULL) {
489 printf ("UID %d was not found\n", uid);
490 print_usage ();
491 exit (STATE_UNKNOWN);
492 }
493 }
494 else {
495 pw = getpwnam (optarg);
496 /* check to be sure user exists */
497 if (pw == NULL) {
498 printf ("User name %s was not found\n", optarg);
499 print_usage ();
500 exit (STATE_UNKNOWN);
501 }
502 /* then get uid */
503 uid = pw->pw_uid;
504 }
505 user = pw->pw_name;
506 asprintf (&fmt, "%s%sUID = %d (%s)", fmt, (options ? ", " : ""),
507 uid, user);
508 options |= USER;
509 break;
510 case 'C': /* command */
511 asprintf (&prog, "%s", optarg);
512 asprintf (&fmt, "%s%scommand name '%s'", fmt, (options ? ", " : ""),
513 prog);
514 options |= PROG;
515 break;
516 case 'a': /* args (full path name with args) */
517 asprintf (&args, "%s", optarg);
518 asprintf (&fmt, "%s%sargs '%s'", fmt, (options ? ", " : ""), args);
519 options |= ARGS;
520 break;
521 case 'r': /* RSS */
522 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
523 asprintf (&fmt, "%s%sRSS >= %d", fmt, (options ? ", " : ""), rss);
524 options |= RSS;
525 break;
526 }
527 printf ("%s: RSS must be an integer!\n\n",
528 progname);
529 print_usage ();
530 exit (STATE_UNKNOWN);
531 case 'z': /* VSZ */
532 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
533 asprintf (&fmt, "%s%sVSZ >= %d", fmt, (options ? ", " : ""), vsz);
534 options |= VSZ;
535 break;
536 }
537 printf ("%s: VSZ must be an integer!\n\n",
538 progname);
539 print_usage ();
540 exit (STATE_UNKNOWN);
541 case 'P': /* PCPU */
542 /* TODO: -P 1.5.5 is accepted */
543 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
544 asprintf (&fmt, "%s%sPCPU >= %.2f", fmt, (options ? ", " : ""), pcpu);
545 options |= PCPU;
546 break;
547 }
548 printf ("%s: PCPU must be a float!\n\n",
549 progname);
550 print_usage ();
551 exit (STATE_UNKNOWN);
552 case 'm':
553 asprintf (&metric_name, "%s", optarg);
554 if ( strcmp(optarg, "PROCS") == 0) {
555 metric = METRIC_PROCS;
556 break;
557 }
558 else if ( strcmp(optarg, "VSZ") == 0) {
559 metric = METRIC_VSZ;
560 break;
561 }
562 else if ( strcmp(optarg, "RSS") == 0 ) {
563 metric = METRIC_RSS;
564 break;
565 }
566 else if ( strcmp(optarg, "CPU") == 0 ) {
567 metric = METRIC_CPU;
568 break;
569 }
570 printf ("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n",
571 progname);
572 print_usage ();
573 exit (STATE_UNKNOWN);
574 case 'v': /* command */
575 verbose++;
576 break;
577 }
578 }
580 c = optind;
581 if (wmax == -1 && argv[c])
582 wmax = atoi (argv[c++]);
583 if (cmax == -1 && argv[c])
584 cmax = atoi (argv[c++]);
585 if (statopts == NULL && argv[c]) {
586 asprintf (&statopts, "%s", argv[c++]);
587 asprintf (&fmt, "%s%sSTATE = %s", fmt, (options ? ", " : ""), statopts);
588 options |= STAT;
589 }
591 return validate_arguments ();
592 }
595 int
596 validate_arguments ()
597 {
599 if (wmax >= 0 && wmin == -1)
600 wmin = 0;
601 if (cmax >= 0 && cmin == -1)
602 cmin = 0;
603 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
604 if (wmax > cmax && cmax != -1) {
605 printf ("wmax (%d) cannot be greater than cmax (%d)\n", wmax, cmax);
606 return ERROR;
607 }
608 if (cmin > wmin && wmin != -1) {
609 printf ("wmin (%d) cannot be less than cmin (%d)\n", wmin, cmin);
610 return ERROR;
611 }
612 }
614 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
615 /* printf ("At least one threshold must be set\n"); */
616 /* return ERROR; */
617 /* } */
619 if (options == 0)
620 options = ALL;
622 return options;
623 }
626 void
627 print_help (void)
628 {
629 print_revision (progname, REVISION);
630 printf
631 ("Copyright (c) %s %s <%s>\n\n%s\n",
632 COPYRIGHT, AUTHOR, EMAIL, SUMMARY);
633 print_usage ();
634 printf ("\nOptions:\n" LONGOPTIONS "\nExamples:\n" EXAMPLES "\n");
635 }
637 void
638 print_usage (void)
639 {
640 printf ("Usage:\n" " %s %s\n"
641 " %s (-h | --help) for detailed help\n"
642 " %s (-V | --version) for version information\n",
643 progname, OPTIONS, progname, progname);
644 }