1 /******************************************************************************
2 *
3 * CHECK_PROCS.C
4 *
5 * Program: Process plugin for Nagios
6 * License: GPL
7 * Copyright (c) 1999 Ethan Galstad (nagios@nagios.org)
8 *
9 * $Id$
10 *
11 * Description:
12 *
13 * This plugin checks the number of currently running processes and
14 * generates WARNING or CRITICAL states if the process count is outside
15 * the specified threshold ranges. The process count can be filtered by
16 * process owner, parent process PID, current state (e.g., 'Z'), or may
17 * be the total number of running processes
18 *
19 * License Information:
20 *
21 * This program is free software; you can redistribute it and/or modify
22 * it under the terms of the GNU General Public License as published by
23 * the Free Software Foundation; either version 2 of the License, or
24 * (at your option) any later version.
25 *
26 * This program is distributed in the hope that it will be useful, but
27 * WITHOUT ANY WARRANTY; without even the implied warranty of
28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 * General Public License for more details.
30 *
31 * You should have received a copy of the GNU General Public License
32 * along with this program; if not, write to the Free Software
33 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
34 *
35 ******************************************************************************/
37 const char *progname = "check_procs";
38 #define REVISION "$Revision$"
39 #define COPYRIGHT "1999-2002"
40 #define AUTHOR "Ethan Galstad"
41 #define EMAIL "nagios@nagios.org"
42 #define SUMMARY "\
43 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
44 metric is outside the required threshold ranges. The metric defaults to number\n\
45 of processes. Search filters can be applied to limit the processes to check.\n"
47 #define OPTIONS "\
48 -w <range> -c <range> [-m metric]\n\
49 [-s state] [-p ppid] [-u user] [-r rss] [-z vsz] [-P %cpu]\n\
50 [-a argument-array] [-C command] [-v]"
52 #define LONGOPTIONS "\
53 Required Arguments:\n\
54 -w, --warning=RANGE\n\
55 Generate warning state if metric is outside this range\n\
56 -c, --critical=RANGE\n\
57 Generate critical state if metric is outside this range\n\
58 Optional Arguments:\n\
59 -m, --metric=TYPE\n\
60 Check thresholds against metric. Valid types:\n\
61 PROCS - number of processes (default)\n\
62 VSZ - virtual memory size\n\
63 RSS - resident set memory size\n\
64 CPU - percentage cpu\n\
65 -v, --verbose\n\
66 Extra information. Up to 3 verbosity levels\n\
67 Optional Filters:\n\
68 -s, --state=STATUSFLAGS\n\
69 Only scan for processes that have, in the output of `ps`, one or\n\
70 more of the status flags you specify (for example R, Z, S, RS,\n\
71 RSZDT, plus others based on the output of your 'ps' command).\n\
72 -p, --ppid=PPID\n\
73 Only scan for children of the parent process ID indicated.\n\
74 -z, --vsz=VSZ\n\
75 Only scan for processes with vsz higher than indicated.\n\
76 -r, --rss=RSS\n\
77 Only scan for processes with rss higher than indicated.\n\
78 -P, --pcpu=PCPU\n\
79 Only scan for processes with pcpu higher than indicated.\n\
80 -u, --user=USER\n\
81 Only scan for processes with user name or ID indicated.\n\
82 -a, --argument-array=STRING\n\
83 Only scan for processes with args that contain STRING.\n\
84 -C, --command=COMMAND\n\
85 Only scan for exact matches to the named COMMAND.\n\
86 \n\
87 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
88 specified 'max:min', a warning status will be generated if the\n\
89 count is inside the specified range\n"
91 #define EXAMPLES "\
92 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
93 Warning if not two processes with command name portsentry. Critical\n\
94 if < 2 or > 1024 processes\n\
95 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
96 Warning alert if > 10 processes with command arguments containing \n\
97 '/usr/local/bin/perl' and owned by root\n\
98 check_procs -w 50000 -c 100000 --metric=VSZ\n\
99 Alert if vsz of any processes over 50K or 100K\n"
101 #include "config.h"
102 #include <pwd.h>
103 #include "common.h"
104 #include "popen.h"
105 #include "utils.h"
107 int process_arguments (int, char **);
108 int validate_arguments (void);
109 void print_usage (void);
110 void print_help (void);
111 int check_thresholds (int);
113 int wmax = -1;
114 int cmax = -1;
115 int wmin = -1;
116 int cmin = -1;
118 int options = 0; /* bitmask of filter criteria to test against */
119 #define ALL 1
120 #define STAT 2
121 #define PPID 4
122 #define USER 8
123 #define PROG 16
124 #define ARGS 32
125 #define VSZ 64
126 #define RSS 128
127 #define PCPU 256
129 /* Different metrics */
130 int metric = 0;
131 #define METRIC_PROCS 0
132 #define METRIC_VSZ 1
133 #define METRIC_RSS 2
134 #define METRIC_CPU 3
135 char *metric_name = "";
137 int verbose = 0;
138 int uid;
139 int ppid;
140 int vsz;
141 int rss;
142 float pcpu;
143 char *statopts = "";
144 char *prog = "";
145 char *args = "";
146 char *fmt = "";
147 char *fails = "";
148 char tmp[MAX_INPUT_BUFFER];
150 int
151 main (int argc, char **argv)
152 {
153 char input_buffer[MAX_INPUT_BUFFER];
155 int procuid = 0;
156 int procppid = 0;
157 int procvsz = 0;
158 int procrss = 0;
159 float procpcpu = 0;
160 char procstat[8];
161 char procprog[MAX_INPUT_BUFFER];
162 char *procargs;
164 const char *zombie = "Z";
166 int resultsum = 0; /* bitmask of the filter criteria met by a process */
167 int found = 0; /* counter for number of lines returned in `ps` output */
168 int procs = 0; /* counter for number of processes meeting filter criteria */
169 int pos; /* number of spaces before 'args' in `ps` output */
170 int cols; /* number of columns in ps output */
171 int warn = 0; /* number of processes in warn state */
172 int crit = 0; /* number of processes in crit state */
173 int i;
175 int result = STATE_UNKNOWN;
177 asprintf (&metric_name, "PROCS");
178 metric = METRIC_PROCS;
180 if (process_arguments (argc, argv) == ERROR)
181 usage ("Unable to parse command line\n");
183 if (verbose >= 2)
184 printf ("CMD: %s\n", PS_COMMAND);
186 child_process = spopen (PS_COMMAND);
187 if (child_process == NULL) {
188 printf ("Could not open pipe: %s\n", PS_COMMAND);
189 return STATE_UNKNOWN;
190 }
192 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
193 if (child_stderr == NULL)
194 printf ("Could not open stderr for %s\n", PS_COMMAND);
196 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
198 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
199 strcpy(procprog,"");
200 asprintf(&procargs,"");
202 cols = sscanf (input_buffer, PS_FORMAT, PS_VARLIST);
204 /* Zombie processes do not give a procprog command */
205 if ( cols == 6 && strstr(procstat, zombie) ) {
206 cols = 7;
207 }
208 if ( cols >= 7 ) {
209 resultsum = 0;
210 asprintf (&procargs, "%s", input_buffer + pos);
211 strip (procargs);
213 if ((options & STAT) && (strstr (statopts, procstat)))
214 resultsum |= STAT;
215 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
216 resultsum |= ARGS;
217 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
218 resultsum |= PROG;
219 if ((options & PPID) && (procppid == ppid))
220 resultsum |= PPID;
221 if ((options & USER) && (procuid == uid))
222 resultsum |= USER;
223 if ((options & VSZ) && (procvsz >= vsz))
224 resultsum |= VSZ;
225 if ((options & RSS) && (procrss >= rss))
226 resultsum |= RSS;
227 if ((options & PCPU) && (procpcpu >= pcpu))
228 resultsum |= PCPU;
230 if (verbose >= 3)
231 printf ("%d %d %d %d %d %.2f %s %s %s\n",
232 procs, procuid, procvsz, procrss,
233 procppid, procpcpu, procstat, procprog, procargs);
235 /* Ignore self */
236 if (strcmp (procprog, progname) == 0)
237 continue;
239 found++;
241 /* Next line if filters not matched */
242 if (!(options == resultsum || options == ALL))
243 continue;
245 procs++;
247 if (metric == METRIC_VSZ)
248 i = check_thresholds (procvsz);
249 else if (metric == METRIC_RSS)
250 i = check_thresholds (procrss);
251 /* TODO? float thresholds for --metric=CPU */
252 else if (metric == METRIC_CPU)
253 i = check_thresholds ((int)procpcpu);
255 if (metric != METRIC_PROCS) {
256 if (i == STATE_WARNING) {
257 warn++;
258 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
259 }
260 if (i == STATE_CRITICAL) {
261 crit++;
262 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
263 }
264 result = max_state (result, i);
265 }
266 }
267 /* This should not happen */
268 else if (verbose) {
269 printf("Not parseable: %s", input_buffer);
270 }
271 }
273 /* If we get anything on STDERR, at least set warning */
274 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
275 if (verbose)
276 printf ("STDERR: %s", input_buffer);
277 result = max_state (result, STATE_WARNING);
278 printf ("System call sent warnings to stderr\n");
279 }
281 (void) fclose (child_stderr);
283 /* close the pipe */
284 if (spclose (child_process)) {
285 printf ("System call returned nonzero status\n");
286 result = max_state (result, STATE_WARNING);
287 }
289 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
290 printf ("Unable to read output\n");
291 return result;
292 }
294 if ( result == STATE_UNKNOWN )
295 result = STATE_OK;
297 /* Needed if procs found, but none match filter */
298 if ( metric == METRIC_PROCS ) {
299 result = max_state (result, check_thresholds (procs) );
300 }
302 if ( result == STATE_OK ) {
303 printf ("%s OK: %d process%s",
304 metric_name, procs, ( procs != 1 ? "es" : "") );
305 } else if (result == STATE_WARNING) {
306 if ( metric == METRIC_PROCS ) {
307 printf ("PROCS WARNING: %d process%s", procs,
308 ( procs != 1 ? "es" : ""));
309 } else {
310 printf ("%s WARNING: %d warn out of %d process%s",
311 metric_name, warn, procs,
312 ( procs != 1 ? "es" : ""));
313 }
314 } else if (result == STATE_CRITICAL) {
315 if (metric == METRIC_PROCS) {
316 printf ("PROCS CRITICAL: %d process%s", procs,
317 ( procs != 1 ? "es" : ""));
318 } else {
319 printf ("%s CRITICAL: %d crit, %d warn out of %d process%s",
320 metric_name, crit, warn, procs,
321 ( procs != 1 ? "es" : ""));
322 }
323 }
325 if (fmt != "") {
326 printf (" with %s", fmt);
327 }
329 if ( verbose >= 1 && fails != "" )
330 printf (" [%s]", fails);
332 printf ("\n");
333 return result;
334 }
336 /* Check thresholds against value */
337 int
338 check_thresholds (int value)
339 {
340 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
341 return OK;
342 }
343 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
344 if (value > cmax && value < cmin)
345 return STATE_CRITICAL;
346 }
347 else if (cmax >= 0 && value > cmax) {
348 return STATE_CRITICAL;
349 }
350 else if (cmin >= 0 && value < cmin) {
351 return STATE_CRITICAL;
352 }
354 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
355 if (value > wmax && value < wmin) {
356 return STATE_WARNING;
357 }
358 }
359 else if (wmax >= 0 && value > wmax) {
360 return STATE_WARNING;
361 }
362 else if (wmin >= 0 && value < wmin) {
363 return STATE_WARNING;
364 }
365 return STATE_OK;
366 }
368 /* process command-line arguments */
369 int
370 process_arguments (int argc, char **argv)
371 {
372 int c = 1;
373 char *user;
374 struct passwd *pw;
375 int option_index = 0;
376 static struct option long_options[] = {
377 {"warning", required_argument, 0, 'w'},
378 {"critical", required_argument, 0, 'c'},
379 {"metric", required_argument, 0, 'm'},
380 {"timeout", required_argument, 0, 't'},
381 {"status", required_argument, 0, 's'},
382 {"ppid", required_argument, 0, 'p'},
383 {"command", required_argument, 0, 'C'},
384 {"vsz", required_argument, 0, 'z'},
385 {"rss", required_argument, 0, 'r'},
386 {"pcpu", required_argument, 0, 'P'},
387 {"argument-array", required_argument, 0, 'a'},
388 {"help", no_argument, 0, 'h'},
389 {"version", no_argument, 0, 'V'},
390 {"verbose", no_argument, 0, 'v'},
391 {0, 0, 0, 0}
392 };
394 for (c = 1; c < argc; c++)
395 if (strcmp ("-to", argv[c]) == 0)
396 strcpy (argv[c], "-t");
398 while (1) {
399 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
400 long_options, &option_index);
402 if (c == -1 || c == EOF)
403 break;
405 switch (c) {
406 case '?': /* help */
407 print_usage ();
408 exit (STATE_UNKNOWN);
409 case 'h': /* help */
410 print_help ();
411 exit (STATE_OK);
412 case 'V': /* version */
413 print_revision (progname, REVISION);
414 exit (STATE_OK);
415 case 't': /* timeout period */
416 if (!is_integer (optarg)) {
417 printf ("%s: Timeout Interval must be an integer!\n\n",
418 progname);
419 print_usage ();
420 exit (STATE_UNKNOWN);
421 }
422 timeout_interval = atoi (optarg);
423 break;
424 case 'c': /* critical threshold */
425 if (is_integer (optarg)) {
426 cmax = atoi (optarg);
427 break;
428 }
429 else if (sscanf (optarg, ":%d", &cmax) == 1) {
430 break;
431 }
432 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2) {
433 break;
434 }
435 else if (sscanf (optarg, "%d:", &cmin) == 1) {
436 break;
437 }
438 else {
439 printf ("%s: Critical Process Count must be an integer!\n\n",
440 progname);
441 print_usage ();
442 exit (STATE_UNKNOWN);
443 }
444 case 'w': /* warning time threshold */
445 if (is_integer (optarg)) {
446 wmax = atoi (optarg);
447 break;
448 }
449 else if (sscanf (optarg, ":%d", &wmax) == 1) {
450 break;
451 }
452 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2) {
453 break;
454 }
455 else if (sscanf (optarg, "%d:", &wmin) == 1) {
456 break;
457 }
458 else {
459 printf ("%s: Warning Process Count must be an integer!\n\n",
460 progname);
461 print_usage ();
462 exit (STATE_UNKNOWN);
463 }
464 case 'p': /* process id */
465 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
466 asprintf (&fmt, "%s%sPPID = %d", fmt, (options ? ", " : ""), ppid);
467 options |= PPID;
468 break;
469 }
470 printf ("%s: Parent Process ID must be an integer!\n\n",
471 progname);
472 print_usage ();
473 exit (STATE_UNKNOWN);
474 case 's': /* status */
475 asprintf (&statopts, "%s", optarg);
476 asprintf (&fmt, "%s%sSTATE = %s", fmt, (options ? ", " : ""), statopts);
477 options |= STAT;
478 break;
479 case 'u': /* user or user id */
480 if (is_integer (optarg)) {
481 uid = atoi (optarg);
482 pw = getpwuid ((uid_t) uid);
483 /* check to be sure user exists */
484 if (pw == NULL) {
485 printf ("UID %d was not found\n", uid);
486 print_usage ();
487 exit (STATE_UNKNOWN);
488 }
489 }
490 else {
491 pw = getpwnam (optarg);
492 /* check to be sure user exists */
493 if (pw == NULL) {
494 printf ("User name %s was not found\n", optarg);
495 print_usage ();
496 exit (STATE_UNKNOWN);
497 }
498 /* then get uid */
499 uid = pw->pw_uid;
500 }
501 user = pw->pw_name;
502 asprintf (&fmt, "%s%sUID = %d (%s)", fmt, (options ? ", " : ""),
503 uid, user);
504 options |= USER;
505 break;
506 case 'C': /* command */
507 asprintf (&prog, "%s", optarg);
508 asprintf (&fmt, "%s%scommand name '%s'", fmt, (options ? ", " : ""),
509 prog);
510 options |= PROG;
511 break;
512 case 'a': /* args (full path name with args) */
513 asprintf (&args, "%s", optarg);
514 asprintf (&fmt, "%s%sargs '%s'", fmt, (options ? ", " : ""), args);
515 options |= ARGS;
516 break;
517 case 'r': /* RSS */
518 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
519 asprintf (&fmt, "%s%sRSS >= %d", fmt, (options ? ", " : ""), rss);
520 options |= RSS;
521 break;
522 }
523 printf ("%s: RSS must be an integer!\n\n",
524 progname);
525 print_usage ();
526 exit (STATE_UNKNOWN);
527 case 'z': /* VSZ */
528 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
529 asprintf (&fmt, "%s%sVSZ >= %d", fmt, (options ? ", " : ""), vsz);
530 options |= VSZ;
531 break;
532 }
533 printf ("%s: VSZ must be an integer!\n\n",
534 progname);
535 print_usage ();
536 exit (STATE_UNKNOWN);
537 case 'P': /* PCPU */
538 /* TODO: -P 1.5.5 is accepted */
539 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
540 asprintf (&fmt, "%s%sPCPU >= %.2f", fmt, (options ? ", " : ""), pcpu);
541 options |= PCPU;
542 break;
543 }
544 printf ("%s: PCPU must be a float!\n\n",
545 progname);
546 print_usage ();
547 exit (STATE_UNKNOWN);
548 case 'm':
549 asprintf (&metric_name, "%s", optarg);
550 if ( strcmp(optarg, "PROCS") == 0) {
551 metric = METRIC_PROCS;
552 break;
553 }
554 else if ( strcmp(optarg, "VSZ") == 0) {
555 metric = METRIC_VSZ;
556 break;
557 }
558 else if ( strcmp(optarg, "RSS") == 0 ) {
559 metric = METRIC_RSS;
560 break;
561 }
562 else if ( strcmp(optarg, "CPU") == 0 ) {
563 metric = METRIC_CPU;
564 break;
565 }
566 printf ("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n",
567 progname);
568 print_usage ();
569 exit (STATE_UNKNOWN);
570 case 'v': /* command */
571 verbose++;
572 break;
573 }
574 }
576 c = optind;
577 if (wmax == -1 && argv[c])
578 wmax = atoi (argv[c++]);
579 if (cmax == -1 && argv[c])
580 cmax = atoi (argv[c++]);
581 if (statopts == NULL && argv[c]) {
582 asprintf (&statopts, "%s", argv[c++]);
583 asprintf (&fmt, "%s%sSTATE = %s", fmt, (options ? ", " : ""), statopts);
584 options |= STAT;
585 }
587 return validate_arguments ();
588 }
591 int
592 validate_arguments ()
593 {
595 if (wmax >= 0 && wmin == -1)
596 wmin = 0;
597 if (cmax >= 0 && cmin == -1)
598 cmin = 0;
599 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
600 if (wmax > cmax && cmax != -1) {
601 printf ("wmax (%d) cannot be greater than cmax (%d)\n", wmax, cmax);
602 return ERROR;
603 }
604 if (cmin > wmin && wmin != -1) {
605 printf ("wmin (%d) cannot be less than cmin (%d)\n", wmin, cmin);
606 return ERROR;
607 }
608 }
610 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
611 /* printf ("At least one threshold must be set\n"); */
612 /* return ERROR; */
613 /* } */
615 if (options == 0)
616 options = ALL;
618 return options;
619 }
622 void
623 print_help (void)
624 {
625 print_revision (progname, REVISION);
626 printf
627 ("Copyright (c) %s %s <%s>\n\n%s\n",
628 COPYRIGHT, AUTHOR, EMAIL, SUMMARY);
629 print_usage ();
630 printf ("\nOptions:\n" LONGOPTIONS "\nExamples:\n" EXAMPLES "\n");
631 }
633 void
634 print_usage (void)
635 {
636 printf ("Usage:\n" " %s %s\n"
637 " %s (-h | --help) for detailed help\n"
638 " %s (-V | --version) for version information\n",
639 progname, OPTIONS, progname, progname);
640 }