1 /******************************************************************************
2 *
3 * CHECK_PROCS.C
4 *
5 * Program: Process plugin for Nagios
6 * License: GPL
7 * Copyright (c) 1999 Ethan Galstad (nagios@nagios.org)
8 *
9 * $Id$
10 *
11 * Description:
12 *
13 * This plugin checks the number of currently running processes and
14 * generates WARNING or CRITICAL states if the process count is outside
15 * the specified threshold ranges. The process count can be filtered by
16 * process owner, parent process PID, current state (e.g., 'Z'), or may
17 * be the total number of running processes
18 *
19 * License Information:
20 *
21 * This program is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful, but
27 * WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
34 *
35 ******************************************************************************/
37 const char *progname = "check_procs";
38 #define REVISION "$Revision$"
39 #define COPYRIGHT "1999-2002"
40 #define AUTHOR "Ethan Galstad"
41 #define EMAIL "nagios@nagios.org"
42 #define SUMMARY "\
43 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
44 metric is outside the required threshold ranges. The metric defaults to number\n\
45 of processes. Search filters can be applied to limit the processes to check.\n"
47 #define OPTIONS "\
48 -w <range> -c <range> [-m metric]\n\
49 [-s state] [-p ppid] [-u user] [-r rss] [-z vsz] [-P %cpu]\n\
50 [-a argument-array] [-C command] [-v]"
52 #define LONGOPTIONS "\
53 Required Arguments:\n\
54 -w, --warning=RANGE\n\
55 Generate warning state if metric is outside this range\n\
56 -c, --critical=RANGE\n\
57 Generate critical state if metric is outside this range\n\
58 Optional Arguments:\n\
59 -m, --metric=TYPE\n\
60 Check thresholds against metric. Valid types:\n\
61 PROCS - number of processes (default)\n\
62 VSZ - virtual memory size\n\
63 RSS - resident set memory size\n\
64 CPU - percentage cpu\n\
65 -v, --verbose\n\
66 Extra information. Up to 3 verbosity levels\n\
67 Optional Filters:\n\
68 -s, --state=STATUSFLAGS\n\
69 Only scan for processes that have, in the output of `ps`, one or\n\
70 more of the status flags you specify (for example R, Z, S, RS,\n\
71 RSZDT, plus others based on the output of your 'ps' command).\n\
72 -p, --ppid=PPID\n\
73 Only scan for children of the parent process ID indicated.\n\
74 -z, --vsz=VSZ\n\
75 Only scan for processes with vsz higher than indicated.\n\
76 -r, --rss=RSS\n\
77 Only scan for processes with rss higher than indicated.\n\
78 -P, --pcpu=PCPU\n\
79 Only scan for processes with pcpu higher than indicated.\n\
80 -u, --user=USER\n\
81 Only scan for processes with user name or ID indicated.\n\
82 -a, --argument-array=STRING\n\
83 Only scan for ARGS that match up to the length of the given STRING.\n\
84 -C, --command=COMMAND\n\
85 Only scan for exact matches to the named COMMAND.\n\
86 \n\
87 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
88 specified 'max:min', a warning status will be generated if the\n\
89 count is inside the specified range\n"
91 #define EXAMPLES "\
92 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
93 Warning if not two processes with command name portsentry. Critical\n\
94 if < 2 or > 1024 processes\n\
95 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
96 Warning alert if > 10 processes with command arguments containing \n\
97 '/usr/local/bin/perl' and owned by root\n\
98 check_procs -w 50000 -c 100000 --metric=VSZ\n\
99 Alert if vsz of any processes over 50K or 100K\n"
101 #include "config.h"
102 #include <pwd.h>
103 #include "common.h"
104 #include "popen.h"
105 #include "utils.h"
107 int process_arguments (int, char **);
108 int validate_arguments (void);
109 void print_usage (void);
110 void print_help (void);
111 int check_thresholds (int);
113 int wmax = -1;
114 int cmax = -1;
115 int wmin = -1;
116 int cmin = -1;
118 int options = 0; /* bitmask of filter criteria to test against */
119 #define ALL 1
120 #define STAT 2
121 #define PPID 4
122 #define USER 8
123 #define PROG 16
124 #define ARGS 32
125 #define VSZ 64
126 #define RSS 128
127 #define PCPU 256
129 /* Different metrics */
130 int metric = 0;
131 #define METRIC_PROCS 0
132 #define METRIC_VSZ 1
133 #define METRIC_RSS 2
134 #define METRIC_CPU 3
135 char *metric_name = "";
137 int verbose = 0;
138 int uid;
139 int ppid;
140 int vsz;
141 int rss;
142 float pcpu;
143 char *statopts = "";
144 char *prog = "";
145 char *args = "";
146 char *fmt = "";
147 char *fails = "";
148 char tmp[MAX_INPUT_BUFFER];
150 int
151 main (int argc, char **argv)
152 {
153 char input_buffer[MAX_INPUT_BUFFER];
155 int procuid = 0;
156 int procppid = 0;
157 int procvsz = 0;
158 int procrss = 0;
159 float procpcpu = 0;
160 char procstat[8];
161 char procprog[MAX_INPUT_BUFFER];
162 char *procargs;
164 const char *zombie = "Z";
166 int resultsum = 0; /* bitmask of the filter criteria met by a process */
167 int found = 0; /* counter for number of lines returned in `ps` output */
168 int procs = 0; /* counter for number of processes meeting filter criteria */
169 int pos; /* number of spaces before 'args' in `ps` output */
170 int cols; /* number of columns in ps output */
171 int warn = 0; /* number of processes in warn state */
172 int crit = 0; /* number of processes in crit state */
173 int i;
175 int result = STATE_UNKNOWN;
177 asprintf (&metric_name, "PROCS");
178 metric = METRIC_PROCS;
180 if (process_arguments (argc, argv) == ERROR)
181 usage ("Unable to parse command line\n");
183 if (verbose >= 2)
184 printf ("CMD: %s\n", PS_COMMAND);
186 child_process = spopen (PS_COMMAND);
187 if (child_process == NULL) {
188 printf ("Could not open pipe: %s\n", PS_COMMAND);
189 return STATE_UNKNOWN;
190 }
192 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
193 if (child_stderr == NULL)
194 printf ("Could not open stderr for %s\n", PS_COMMAND);
196 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
198 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
199 strcpy(procprog,"");
200 asprintf(&procargs,"");
202 cols = sscanf (input_buffer, PS_FORMAT, PS_VARLIST);
204 /* Zombie processes do not give a procprog command */
205 /* - would they give other columns? */
206 if ( cols == 3 && strstr(procstat, zombie) ) {
207 cols = 4;
208 }
209 if ( cols >= 7 ) {
210 found++;
211 resultsum = 0;
212 asprintf (&procargs, "%s", input_buffer + pos);
213 strip (procargs);
215 if ((options & STAT) && (strstr (statopts, procstat)))
216 resultsum |= STAT;
217 if ((options & ARGS) && procargs && (strstr (procargs, args) == procargs))
218 resultsum |= ARGS;
219 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
220 resultsum |= PROG;
221 if ((options & PPID) && (procppid == ppid))
222 resultsum |= PPID;
223 if ((options & USER) && (procuid == uid))
224 resultsum |= USER;
225 if ((options & VSZ) && (procvsz >= vsz))
226 resultsum |= VSZ;
227 if ((options & RSS) && (procrss >= rss))
228 resultsum |= RSS;
229 if ((options & PCPU) && (procpcpu >= pcpu))
230 resultsum |= PCPU;
232 if (verbose >= 3)
233 printf ("%d %d %d %d %d %.2f %s %s %s\n",
234 procs, procuid, procvsz, procrss,
235 procppid, procpcpu, procstat, procprog, procargs);
237 /* Next line if filters not matched */
238 if (!(options == resultsum || options == ALL))
239 continue;
241 procs++;
243 if (metric == METRIC_VSZ)
244 i = check_thresholds (procvsz);
245 else if (metric == METRIC_RSS)
246 i = check_thresholds (procrss);
247 /* TODO? float thresholds for --metric=CPU */
248 else if (metric == METRIC_CPU)
249 i = check_thresholds ((int)procpcpu);
251 if (metric != METRIC_PROCS) {
252 if (i == STATE_WARNING) {
253 warn++;
254 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
255 }
256 if (i == STATE_CRITICAL) {
257 crit++;
258 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
259 }
260 result = max_state (result, i);
261 }
262 }
263 /* This should not happen */
264 else if (verbose) {
265 printf("Not parseable: %s", input_buffer);
266 }
267 }
269 /* If we get anything on STDERR, at least set warning */
270 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
271 if (verbose)
272 printf ("STDERR: %s", input_buffer);
273 result = max_state (result, STATE_WARNING);
274 printf ("System call sent warnings to stderr\n");
275 }
277 (void) fclose (child_stderr);
279 /* close the pipe */
280 if (spclose (child_process)) {
281 printf ("System call returned nonzero status\n");
282 result = max_state (result, STATE_WARNING);
283 }
285 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
286 printf ("Unable to read output\n");
287 return result;
288 }
290 if ( result == STATE_UNKNOWN )
291 result = STATE_OK;
293 /* Needed if procs found, but none match filter */
294 if ( metric == METRIC_PROCS ) {
295 result = max_state (result, check_thresholds (procs) );
296 }
298 if ( result == STATE_OK ) {
299 printf ("%s OK: %d process%s",
300 metric_name, procs, ( procs != 1 ? "es" : "") );
301 } else if (result == STATE_WARNING) {
302 if ( metric == METRIC_PROCS ) {
303 printf ("PROCS WARNING: %d process%s", procs,
304 ( procs != 1 ? "es" : ""));
305 } else {
306 printf ("%s WARNING: %d warn out of %d process%s",
307 metric_name, warn, procs,
308 ( procs != 1 ? "es" : ""));
309 }
310 } else if (result == STATE_CRITICAL) {
311 if (metric == METRIC_PROCS) {
312 printf ("PROCS CRITICAL: %d process%s", procs,
313 ( procs != 1 ? "es" : ""));
314 } else {
315 printf ("%s CRITICAL: %d crit, %d warn out of %d process%s",
316 metric_name, crit, warn, procs,
317 ( procs != 1 ? "es" : ""));
318 }
319 }
321 if (fmt != "") {
322 printf (" with %s", fmt);
323 }
325 if ( verbose >= 1 && fails != "" )
326 printf (" [%s]", fails);
328 printf ("\n");
329 return result;
330 }
332 /* Check thresholds against value */
333 int
334 check_thresholds (int value)
335 {
336 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
337 return OK;
338 }
339 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
340 if (value > cmax && value < cmin)
341 return STATE_CRITICAL;
342 }
343 else if (cmax >= 0 && value > cmax) {
344 return STATE_CRITICAL;
345 }
346 else if (cmin >= 0 && value < cmin) {
347 return STATE_CRITICAL;
348 }
350 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
351 if (value > wmax && value < wmin) {
352 return STATE_WARNING;
353 }
354 }
355 else if (wmax >= 0 && value > wmax) {
356 return STATE_WARNING;
357 }
358 else if (wmin >= 0 && value < wmin) {
359 return STATE_WARNING;
360 }
361 return STATE_OK;
362 }
364 /* process command-line arguments */
365 int
366 process_arguments (int argc, char **argv)
367 {
368 int c = 1;
369 char *user;
370 struct passwd *pw;
371 int option_index = 0;
372 static struct option long_options[] = {
373 {"warning", required_argument, 0, 'w'},
374 {"critical", required_argument, 0, 'c'},
375 {"metric", required_argument, 0, 'm'},
376 {"timeout", required_argument, 0, 't'},
377 {"status", required_argument, 0, 's'},
378 {"ppid", required_argument, 0, 'p'},
379 {"command", required_argument, 0, 'C'},
380 {"vsz", required_argument, 0, 'z'},
381 {"rss", required_argument, 0, 'r'},
382 {"pcpu", required_argument, 0, 'P'},
383 {"argument-array", required_argument, 0, 'a'},
384 {"help", no_argument, 0, 'h'},
385 {"version", no_argument, 0, 'V'},
386 {"verbose", no_argument, 0, 'v'},
387 {0, 0, 0, 0}
388 };
390 for (c = 1; c < argc; c++)
391 if (strcmp ("-to", argv[c]) == 0)
392 strcpy (argv[c], "-t");
394 while (1) {
395 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
396 long_options, &option_index);
398 if (c == -1 || c == EOF)
399 break;
401 switch (c) {
402 case '?': /* help */
403 print_usage ();
404 exit (STATE_UNKNOWN);
405 case 'h': /* help */
406 print_help ();
407 exit (STATE_OK);
408 case 'V': /* version */
409 print_revision (progname, REVISION);
410 exit (STATE_OK);
411 case 't': /* timeout period */
412 if (!is_integer (optarg)) {
413 printf ("%s: Timeout Interval must be an integer!\n\n",
414 progname);
415 print_usage ();
416 exit (STATE_UNKNOWN);
417 }
418 timeout_interval = atoi (optarg);
419 break;
420 case 'c': /* critical threshold */
421 if (is_integer (optarg)) {
422 cmax = atoi (optarg);
423 break;
424 }
425 else if (sscanf (optarg, ":%d", &cmax) == 1) {
426 break;
427 }
428 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2) {
429 break;
430 }
431 else if (sscanf (optarg, "%d:", &cmin) == 1) {
432 break;
433 }
434 else {
435 printf ("%s: Critical Process Count must be an integer!\n\n",
436 progname);
437 print_usage ();
438 exit (STATE_UNKNOWN);
439 }
440 case 'w': /* warning time threshold */
441 if (is_integer (optarg)) {
442 wmax = atoi (optarg);
443 break;
444 }
445 else if (sscanf (optarg, ":%d", &wmax) == 1) {
446 break;
447 }
448 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2) {
449 break;
450 }
451 else if (sscanf (optarg, "%d:", &wmin) == 1) {
452 break;
453 }
454 else {
455 printf ("%s: Warning Process Count must be an integer!\n\n",
456 progname);
457 print_usage ();
458 exit (STATE_UNKNOWN);
459 }
460 case 'p': /* process id */
461 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
462 asprintf (&fmt, "%s%sPPID = %d", fmt, (options ? ", " : ""), ppid);
463 options |= PPID;
464 break;
465 }
466 printf ("%s: Parent Process ID must be an integer!\n\n",
467 progname);
468 print_usage ();
469 exit (STATE_UNKNOWN);
470 case 's': /* status */
471 asprintf (&statopts, "%s", optarg);
472 asprintf (&fmt, "%s%sSTATE = %s", fmt, (options ? ", " : ""), statopts);
473 options |= STAT;
474 break;
475 case 'u': /* user or user id */
476 if (is_integer (optarg)) {
477 uid = atoi (optarg);
478 pw = getpwuid ((uid_t) uid);
479 /* check to be sure user exists */
480 if (pw == NULL) {
481 printf ("UID %d was not found\n", uid);
482 print_usage ();
483 exit (STATE_UNKNOWN);
484 }
485 }
486 else {
487 pw = getpwnam (optarg);
488 /* check to be sure user exists */
489 if (pw == NULL) {
490 printf ("User name %s was not found\n", optarg);
491 print_usage ();
492 exit (STATE_UNKNOWN);
493 }
494 /* then get uid */
495 uid = pw->pw_uid;
496 }
497 user = pw->pw_name;
498 asprintf (&fmt, "%s%sUID = %d (%s)", fmt, (options ? ", " : ""),
499 uid, user);
500 options |= USER;
501 break;
502 case 'C': /* command */
503 asprintf (&prog, "%s", optarg);
504 asprintf (&fmt, "%s%scommand name '%s'", fmt, (options ? ", " : ""),
505 prog);
506 options |= PROG;
507 break;
508 case 'a': /* args (full path name with args) */
509 asprintf (&args, "%s", optarg);
510 asprintf (&fmt, "%s%sargs '%s'", fmt, (options ? ", " : ""), args);
511 options |= ARGS;
512 break;
513 case 'r': /* RSS */
514 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
515 asprintf (&fmt, "%s%sRSS >= %d", fmt, (options ? ", " : ""), rss);
516 options |= RSS;
517 break;
518 }
519 printf ("%s: RSS must be an integer!\n\n",
520 progname);
521 print_usage ();
522 exit (STATE_UNKNOWN);
523 case 'z': /* VSZ */
524 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
525 asprintf (&fmt, "%s%sVSZ >= %d", fmt, (options ? ", " : ""), vsz);
526 options |= VSZ;
527 break;
528 }
529 printf ("%s: VSZ must be an integer!\n\n",
530 progname);
531 print_usage ();
532 exit (STATE_UNKNOWN);
533 case 'P': /* PCPU */
534 /* TODO: -P 1.5.5 is accepted */
535 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
536 asprintf (&fmt, "%s%sPCPU >= %.2f", fmt, (options ? ", " : ""), pcpu);
537 options |= PCPU;
538 break;
539 }
540 printf ("%s: PCPU must be a float!\n\n",
541 progname);
542 print_usage ();
543 exit (STATE_UNKNOWN);
544 case 'm':
545 asprintf (&metric_name, "%s", optarg);
546 if ( strcmp(optarg, "PROCS") == 0) {
547 metric = METRIC_PROCS;
548 break;
549 }
550 else if ( strcmp(optarg, "VSZ") == 0) {
551 metric = METRIC_VSZ;
552 break;
553 }
554 else if ( strcmp(optarg, "RSS") == 0 ) {
555 metric = METRIC_RSS;
556 break;
557 }
558 else if ( strcmp(optarg, "CPU") == 0 ) {
559 metric = METRIC_CPU;
560 break;
561 }
562 printf ("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n",
563 progname);
564 print_usage ();
565 exit (STATE_UNKNOWN);
566 case 'v': /* command */
567 verbose++;
568 break;
569 }
570 }
572 c = optind;
573 if (wmax == -1 && argv[c])
574 wmax = atoi (argv[c++]);
575 if (cmax == -1 && argv[c])
576 cmax = atoi (argv[c++]);
577 if (statopts == NULL && argv[c]) {
578 asprintf (&statopts, "%s", argv[c++]);
579 asprintf (&fmt, "%s%sSTATE = %s", fmt, (options ? ", " : ""), statopts);
580 options |= STAT;
581 }
583 return validate_arguments ();
584 }
587 int
588 validate_arguments ()
589 {
591 if (wmax >= 0 && wmin == -1)
592 wmin = 0;
593 if (cmax >= 0 && cmin == -1)
594 cmin = 0;
595 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
596 if (wmax > cmax && cmax != -1) {
597 printf ("wmax (%d) cannot be greater than cmax (%d)\n", wmax, cmax);
598 return ERROR;
599 }
600 if (cmin > wmin && wmin != -1) {
601 printf ("wmin (%d) cannot be less than cmin (%d)\n", wmin, cmin);
602 return ERROR;
603 }
604 }
606 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
607 /* printf ("At least one threshold must be set\n"); */
608 /* return ERROR; */
609 /* } */
611 if (options == 0)
612 options = ALL;
614 return options;
615 }
618 void
619 print_help (void)
620 {
621 print_revision (progname, REVISION);
622 printf
623 ("Copyright (c) %s %s <%s>\n\n%s\n",
624 COPYRIGHT, AUTHOR, EMAIL, SUMMARY);
625 print_usage ();
626 printf ("\nOptions:\n" LONGOPTIONS "\nExamples:\n" EXAMPLES "\n");
627 }
629 void
630 print_usage (void)
631 {
632 printf ("Usage:\n" " %s %s\n"
633 " %s (-h | --help) for detailed help\n"
634 " %s (-V | --version) for version information\n",
635 progname, OPTIONS, progname, progname);
636 }