1 /******************************************************************************
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 ******************************************************************************/
19 const char *progname = "check_procs";
20 const char *revision = "$Revision$";
21 const char *copyright = "2000-2003";
22 const char *email = "nagiosplug-devel@lists.sourceforge.net";
24 #include "common.h"
25 #include "popen.h"
26 #include "utils.h"
27 #include <pwd.h>
29 int process_arguments (int, char **);
30 int validate_arguments (void);
31 int check_thresholds (int);
32 void print_help (void);
33 void print_usage (void);
35 int wmax = -1;
36 int cmax = -1;
37 int wmin = -1;
38 int cmin = -1;
40 int options = 0; /* bitmask of filter criteria to test against */
41 #define ALL 1
42 #define STAT 2
43 #define PPID 4
44 #define USER 8
45 #define PROG 16
46 #define ARGS 32
47 #define VSZ 64
48 #define RSS 128
49 #define PCPU 256
51 /* Different metrics */
52 char *metric_name;
53 enum metric {
54 METRIC_PROCS,
55 METRIC_VSZ,
56 METRIC_RSS,
57 METRIC_CPU
58 };
59 enum metric metric = METRIC_PROCS;
61 int verbose = 0;
62 int uid;
63 int ppid;
64 int vsz;
65 int rss;
66 float pcpu;
67 char *statopts;
68 char *prog;
69 char *args;
70 char *fmt;
71 char *fails;
72 char tmp[MAX_INPUT_BUFFER];
78 \f
79 int
80 main (int argc, char **argv)
81 {
82 char *input_buffer;
83 char *input_line;
84 char *procprog;
86 int procuid = 0;
87 int procppid = 0;
88 int procvsz = 0;
89 int procrss = 0;
90 float procpcpu = 0;
91 char procstat[8];
92 char *procargs;
93 char *temp_string;
95 const char *zombie = "Z";
97 int resultsum = 0; /* bitmask of the filter criteria met by a process */
98 int found = 0; /* counter for number of lines returned in `ps` output */
99 int procs = 0; /* counter for number of processes meeting filter criteria */
100 int pos; /* number of spaces before 'args' in `ps` output */
101 int cols; /* number of columns in ps output */
102 int expected_cols = PS_COLS - 1;
103 int warn = 0; /* number of processes in warn state */
104 int crit = 0; /* number of processes in crit state */
105 int i = 0;
107 int result = STATE_UNKNOWN;
109 setlocale (LC_ALL, "");
110 bindtextdomain (PACKAGE, LOCALEDIR);
111 textdomain (PACKAGE);
113 input_buffer = malloc (MAX_INPUT_BUFFER);
114 procprog = malloc (MAX_INPUT_BUFFER);
116 asprintf (&metric_name, "PROCS");
117 metric = METRIC_PROCS;
119 if (process_arguments (argc, argv) == ERROR)
120 usage (_("Unable to parse command line\n"));
123 /* Set signal handling and alarm timeout */
124 if (signal (SIGALRM, popen_timeout_alarm_handler) == SIG_ERR) {
125 printf (_("Cannot catch SIGALRM"));
126 return STATE_UNKNOWN;
127 }
128 alarm (timeout_interval);
131 if (verbose >= 2)
132 printf (_("CMD: %s\n"), PS_COMMAND);
134 child_process = spopen (PS_COMMAND);
135 if (child_process == NULL) {
136 printf (_("Could not open pipe: %s\n"), PS_COMMAND);
137 return STATE_UNKNOWN;
138 }
140 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
141 if (child_stderr == NULL)
142 printf (_("Could not open stderr for %s\n"), PS_COMMAND);
144 /* flush first line */
145 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
146 while ( input_buffer[strlen(input_buffer)-1] != '\n' )
147 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
149 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
150 asprintf (&input_line, "%s", input_buffer);
151 while ( input_buffer[strlen(input_buffer)-1] != '\n' ) {
152 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
153 asprintf (&input_line, "%s%s", input_line, input_buffer);
154 }
156 if (verbose >= 3)
157 printf ("%s", input_line);
159 strcpy (procprog, "");
160 asprintf (&procargs, "%s", "");
162 cols = sscanf (input_line, PS_FORMAT, PS_VARLIST);
164 /* Zombie processes do not give a procprog command */
165 if ( cols == (expected_cols - 1) && strstr(procstat, zombie) ) {
166 cols = expected_cols;
167 }
168 if ( cols >= expected_cols ) {
169 resultsum = 0;
170 asprintf (&procargs, "%s", input_line + pos);
171 strip (procargs);
173 /* Some ps return full pathname for command. This removes path */
174 temp_string = strtok ((char *)procprog, "/");
175 while (temp_string) {
176 strcpy(procprog, temp_string);
177 temp_string = strtok (NULL, "/");
178 }
180 if (verbose >= 3)
181 printf ("%d %d %d %d %d %.2f %s %s %s\n",
182 procs, procuid, procvsz, procrss,
183 procppid, procpcpu, procstat, procprog, procargs);
185 /* Ignore self */
186 if (strcmp (procprog, progname) == 0) {
187 continue;
188 }
190 if ((options & STAT) && (strstr (statopts, procstat)))
191 resultsum |= STAT;
192 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
193 resultsum |= ARGS;
194 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
195 resultsum |= PROG;
196 if ((options & PPID) && (procppid == ppid))
197 resultsum |= PPID;
198 if ((options & USER) && (procuid == uid))
199 resultsum |= USER;
200 if ((options & VSZ) && (procvsz >= vsz))
201 resultsum |= VSZ;
202 if ((options & RSS) && (procrss >= rss))
203 resultsum |= RSS;
204 if ((options & PCPU) && (procpcpu >= pcpu))
205 resultsum |= PCPU;
207 found++;
209 /* Next line if filters not matched */
210 if (!(options == resultsum || options == ALL))
211 continue;
213 procs++;
215 if (metric == METRIC_VSZ)
216 i = check_thresholds (procvsz);
217 else if (metric == METRIC_RSS)
218 i = check_thresholds (procrss);
219 /* TODO? float thresholds for --metric=CPU */
220 else if (metric == METRIC_CPU)
221 i = check_thresholds ((int)procpcpu);
223 if (metric != METRIC_PROCS) {
224 if (i == STATE_WARNING) {
225 warn++;
226 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
227 result = max_state (result, i);
228 }
229 if (i == STATE_CRITICAL) {
230 crit++;
231 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
232 result = max_state (result, i);
233 }
234 }
235 }
236 /* This should not happen */
237 else if (verbose) {
238 printf(_("Not parseable: %s"), input_buffer);
239 }
240 }
242 /* If we get anything on STDERR, at least set warning */
243 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
244 if (verbose)
245 printf (_("STDERR: %s"), input_buffer);
246 result = max_state (result, STATE_WARNING);
247 printf (_("System call sent warnings to stderr\n"));
248 }
250 (void) fclose (child_stderr);
252 /* close the pipe */
253 if (spclose (child_process)) {
254 printf (_("System call returned nonzero status\n"));
255 result = max_state (result, STATE_WARNING);
256 }
258 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
259 printf (_("Unable to read output\n"));
260 return result;
261 }
263 if ( result == STATE_UNKNOWN )
264 result = STATE_OK;
266 /* Needed if procs found, but none match filter */
267 if ( metric == METRIC_PROCS ) {
268 result = max_state (result, check_thresholds (procs) );
269 }
271 if ( result == STATE_OK ) {
272 printf ("%s %s: ", metric_name, _("OK"));
273 } else if (result == STATE_WARNING) {
274 printf ("%s %s: ", metric_name, _("WARNING"));
275 if ( metric != METRIC_PROCS ) {
276 printf (_("%d warn out of "), warn);
277 }
278 } else if (result == STATE_CRITICAL) {
279 printf ("%s %s: ", metric_name, _("CRITICAL"));
280 if (metric != METRIC_PROCS) {
281 printf (_("%d crit, %d warn out of "), crit, warn);
282 }
283 }
284 printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs);
286 if (strcmp(fmt,"") != 0) {
287 printf (_(" with %s"), fmt);
288 }
290 if ( verbose >= 1 && strcmp(fails,"") )
291 printf (" [%s]", fails);
293 printf ("\n");
294 return result;
295 }
301 \f
302 /* process command-line arguments */
303 int
304 process_arguments (int argc, char **argv)
305 {
306 int c = 1;
307 char *user;
308 struct passwd *pw;
309 int option = 0;
310 static struct option longopts[] = {
311 {"warning", required_argument, 0, 'w'},
312 {"critical", required_argument, 0, 'c'},
313 {"metric", required_argument, 0, 'm'},
314 {"timeout", required_argument, 0, 't'},
315 {"status", required_argument, 0, 's'},
316 {"ppid", required_argument, 0, 'p'},
317 {"command", required_argument, 0, 'C'},
318 {"vsz", required_argument, 0, 'z'},
319 {"rss", required_argument, 0, 'r'},
320 {"pcpu", required_argument, 0, 'P'},
321 {"argument-array", required_argument, 0, 'a'},
322 {"help", no_argument, 0, 'h'},
323 {"version", no_argument, 0, 'V'},
324 {"verbose", no_argument, 0, 'v'},
325 {0, 0, 0, 0}
326 };
328 for (c = 1; c < argc; c++)
329 if (strcmp ("-to", argv[c]) == 0)
330 strcpy (argv[c], "-t");
332 while (1) {
333 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
334 longopts, &option);
336 if (c == -1 || c == EOF)
337 break;
339 switch (c) {
340 case '?': /* help */
341 print_usage ();
342 exit (STATE_UNKNOWN);
343 case 'h': /* help */
344 print_help ();
345 exit (STATE_OK);
346 case 'V': /* version */
347 print_revision (progname, revision);
348 exit (STATE_OK);
349 case 't': /* timeout period */
350 if (!is_integer (optarg))
351 usage2 (_("Timeout interval must be a positive integer"), optarg);
352 else
353 timeout_interval = atoi (optarg);
354 break;
355 case 'c': /* critical threshold */
356 if (is_integer (optarg))
357 cmax = atoi (optarg);
358 else if (sscanf (optarg, ":%d", &cmax) == 1)
359 break;
360 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2)
361 break;
362 else if (sscanf (optarg, "%d:", &cmin) == 1)
363 break;
364 else
365 usage (_("Critical Process Count must be an integer!\n\n"));
366 break;
367 case 'w': /* warning threshold */
368 if (is_integer (optarg))
369 wmax = atoi (optarg);
370 else if (sscanf (optarg, ":%d", &wmax) == 1)
371 break;
372 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2)
373 break;
374 else if (sscanf (optarg, "%d:", &wmin) == 1)
375 break;
376 else
377 usage (_("Warning Process Count must be an integer!\n\n"));
378 break;
379 case 'p': /* process id */
380 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
381 asprintf (&fmt, "%s%sPPID = %d", (fmt ? fmt : "") , (options ? ", " : ""), ppid);
382 options |= PPID;
383 break;
384 }
385 usage2 (_("%s: Parent Process ID must be an integer!\n\n"), progname);
386 case 's': /* status */
387 if (statopts)
388 break;
389 else
390 statopts = optarg;
391 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
392 options |= STAT;
393 break;
394 case 'u': /* user or user id */
395 if (is_integer (optarg)) {
396 uid = atoi (optarg);
397 pw = getpwuid ((uid_t) uid);
398 /* check to be sure user exists */
399 if (pw == NULL)
400 usage2 (_("UID %s was not found\n"), optarg);
401 }
402 else {
403 pw = getpwnam (optarg);
404 /* check to be sure user exists */
405 if (pw == NULL)
406 usage2 (_("User name %s was not found\n"), optarg);
407 /* then get uid */
408 uid = pw->pw_uid;
409 }
410 user = pw->pw_name;
411 asprintf (&fmt, _("%s%sUID = %d (%s)"), (fmt ? fmt : ""), (options ? ", " : ""),
412 uid, user);
413 options |= USER;
414 break;
415 case 'C': /* command */
416 if (prog)
417 break;
418 else
419 prog = optarg;
420 asprintf (&fmt, _("%s%scommand name '%s'"), (fmt ? fmt : ""), (options ? ", " : ""),
421 prog);
422 options |= PROG;
423 break;
424 case 'a': /* args (full path name with args) */
425 if (args)
426 break;
427 else
428 args = optarg;
429 asprintf (&fmt, _("%s%sargs '%s'"), (fmt ? fmt : ""), (options ? ", " : ""), args);
430 options |= ARGS;
431 break;
432 case 'r': /* RSS */
433 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
434 asprintf (&fmt, _("%s%sRSS >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), rss);
435 options |= RSS;
436 break;
437 }
438 usage2 (_("%s: RSS must be an integer!\n\n"), progname);
439 case 'z': /* VSZ */
440 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
441 asprintf (&fmt, _("%s%sVSZ >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), vsz);
442 options |= VSZ;
443 break;
444 }
445 usage2 (_("%s: VSZ must be an integer!\n\n"), progname);
446 case 'P': /* PCPU */
447 /* TODO: -P 1.5.5 is accepted */
448 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
449 asprintf (&fmt, _("%s%sPCPU >= %.2f"), (fmt ? fmt : ""), (options ? ", " : ""), pcpu);
450 options |= PCPU;
451 break;
452 }
453 usage2 (_("%s: PCPU must be a float!\n\n"), progname);
454 case 'm':
455 asprintf (&metric_name, "%s", optarg);
456 if ( strcmp(optarg, "PROCS") == 0) {
457 metric = METRIC_PROCS;
458 break;
459 }
460 else if ( strcmp(optarg, "VSZ") == 0) {
461 metric = METRIC_VSZ;
462 break;
463 }
464 else if ( strcmp(optarg, "RSS") == 0 ) {
465 metric = METRIC_RSS;
466 break;
467 }
468 else if ( strcmp(optarg, "CPU") == 0 ) {
469 metric = METRIC_CPU;
470 break;
471 }
472 printf (_("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n"),
473 progname);
474 print_usage ();
475 exit (STATE_UNKNOWN);
476 case 'v': /* command */
477 verbose++;
478 break;
479 }
480 }
482 c = optind;
483 if (wmax == -1 && argv[c])
484 wmax = atoi (argv[c++]);
485 if (cmax == -1 && argv[c])
486 cmax = atoi (argv[c++]);
487 if (statopts == NULL && argv[c]) {
488 asprintf (&statopts, "%s", argv[c++]);
489 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
490 options |= STAT;
491 }
493 return validate_arguments ();
494 }
499 int
500 validate_arguments ()
501 {
503 if (wmax >= 0 && wmin == -1)
504 wmin = 0;
505 if (cmax >= 0 && cmin == -1)
506 cmin = 0;
507 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
508 if (wmax > cmax && cmax != -1) {
509 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax, cmax);
510 return ERROR;
511 }
512 if (cmin > wmin && wmin != -1) {
513 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin, cmin);
514 return ERROR;
515 }
516 }
518 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
519 /* printf ("At least one threshold must be set\n"); */
520 /* return ERROR; */
521 /* } */
523 if (options == 0)
524 options = ALL;
526 if (statopts==NULL)
527 statopts = strdup("");
529 if (prog==NULL)
530 prog = strdup("");
532 if (args==NULL)
533 args = strdup("");
535 if (fmt==NULL)
536 fmt = strdup("");
538 if (fails==NULL)
539 fails = strdup("");
541 return options;
542 }
548 \f
549 /* Check thresholds against value */
550 int
551 check_thresholds (int value)
552 {
553 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
554 return OK;
555 }
556 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
557 if (value > cmax && value < cmin)
558 return STATE_CRITICAL;
559 }
560 else if (cmax >= 0 && value > cmax) {
561 return STATE_CRITICAL;
562 }
563 else if (cmin >= 0 && value < cmin) {
564 return STATE_CRITICAL;
565 }
567 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
568 if (value > wmax && value < wmin) {
569 return STATE_WARNING;
570 }
571 }
572 else if (wmax >= 0 && value > wmax) {
573 return STATE_WARNING;
574 }
575 else if (wmin >= 0 && value < wmin) {
576 return STATE_WARNING;
577 }
578 return STATE_OK;
579 }
585 \f
586 void
587 print_help (void)
588 {
589 print_revision (progname, revision);
591 printf ("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>");
592 printf (COPYRIGHT, copyright, email);
594 printf(_("\
595 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
596 metric is outside the required threshold ranges. The metric defaults to number\n\
597 of processes. Search filters can be applied to limit the processes to check.\n\n"));
599 print_usage ();
601 printf(_("\n\
602 Required Arguments:\n\
603 -w, --warning=RANGE\n\
604 Generate warning state if metric is outside this range\n\
605 -c, --critical=RANGE\n\
606 Generate critical state if metric is outside this range\n"));
608 printf(_("\n\
609 Optional Arguments:\n\
610 -m, --metric=TYPE\n\
611 Check thresholds against metric. Valid types:\n\
612 PROCS - number of processes (default)\n\
613 VSZ - virtual memory size\n\
614 RSS - resident set memory size\n\
615 CPU - percentage cpu\n"));
617 printf (_(UT_TIMEOUT), DEFAULT_SOCKET_TIMEOUT);
619 printf(_("\
620 -v, --verbose\n\
621 Extra information. Up to 3 verbosity levels\n"));
623 printf(_("\n\
624 Optional Filters:\n\
625 -s, --state=STATUSFLAGS\n\
626 Only scan for processes that have, in the output of `ps`, one or\n\
627 more of the status flags you specify (for example R, Z, S, RS,\n\
628 RSZDT, plus others based on the output of your 'ps' command).\n\
629 -p, --ppid=PPID\n\
630 Only scan for children of the parent process ID indicated.\n\
631 -z, --vsz=VSZ\n\
632 Only scan for processes with vsz higher than indicated.\n\
633 -r, --rss=RSS\n\
634 Only scan for processes with rss higher than indicated.\n"));
636 printf(_("\
637 -P, --pcpu=PCPU\n\
638 Only scan for processes with pcpu higher than indicated.\n\
639 -u, --user=USER\n\
640 Only scan for processes with user name or ID indicated.\n\
641 -a, --argument-array=STRING\n\
642 Only scan for processes with args that contain STRING.\n\
643 -C, --command=COMMAND\n\
644 Only scan for exact matches of COMMAND (without path).\n"));
646 printf(_("\n\
647 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
648 specified 'max:min', a warning status will be generated if the\n\
649 count is inside the specified range\n\n"));
651 printf(_("\
652 This plugin checks the number of currently running processes and\n\
653 generates WARNING or CRITICAL states if the process count is outside\n\
654 the specified threshold ranges. The process count can be filtered by\n\
655 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
656 be the total number of running processes\n\n"));
658 printf(_("\
659 Examples:\n\
660 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
661 Warning if not two processes with command name portsentry. Critical\n\
662 if < 2 or > 1024 processes\n\n\
663 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
664 Warning alert if > 10 processes with command arguments containing \n\
665 '/usr/local/bin/perl' and owned by root\n\n\
666 check_procs -w 50000 -c 100000 --metric=VSZ\n\
667 Alert if vsz of any processes over 50K or 100K\n\
668 check_procs -w 10 -c 20 --metric=CPU\n\
669 Alert if cpu of any processes over 10% or 20%\n\n"));
671 printf (_(UT_SUPPORT));
672 }
674 void
675 print_usage (void)
676 {
677 printf ("\
678 Usage: %s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n\
679 [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n\
680 [-C command] [-t timeout] [-v]\n", progname);
681 printf (_(UT_HLP_VRS), progname, progname);
682 }