1 /******************************************************************************
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 ******************************************************************************/
19 const char *progname = "check_procs";
20 const char *revision = "$Revision$";
21 const char *copyright = "2000-2003";
22 const char *email = "nagiosplug-devel@lists.sourceforge.net";
24 #include "common.h"
25 #include "popen.h"
26 #include "utils.h"
27 #include <pwd.h>
29 int process_arguments (int, char **);
30 int validate_arguments (void);
31 int check_thresholds (int);
32 void print_help (void);
33 void print_usage (void);
35 int wmax = -1;
36 int cmax = -1;
37 int wmin = -1;
38 int cmin = -1;
40 int options = 0; /* bitmask of filter criteria to test against */
41 #define ALL 1
42 #define STAT 2
43 #define PPID 4
44 #define USER 8
45 #define PROG 16
46 #define ARGS 32
47 #define VSZ 64
48 #define RSS 128
49 #define PCPU 256
51 /* Different metrics */
52 char *metric_name;
53 enum metric {
54 METRIC_PROCS,
55 METRIC_VSZ,
56 METRIC_RSS,
57 METRIC_CPU
58 };
59 enum metric metric = METRIC_PROCS;
61 int verbose = 0;
62 int uid;
63 int ppid;
64 int vsz;
65 int rss;
66 float pcpu;
67 char *statopts;
68 char *prog;
69 char *args;
70 char *fmt;
71 char *fails;
72 char tmp[MAX_INPUT_BUFFER];
78 \f
79 int
80 main (int argc, char **argv)
81 {
82 char input_buffer[MAX_INPUT_BUFFER];
84 int procuid = 0;
85 int procppid = 0;
86 int procvsz = 0;
87 int procrss = 0;
88 float procpcpu = 0;
89 char procstat[8];
90 char procprog[MAX_INPUT_BUFFER];
91 char *procargs;
93 const char *zombie = "Z";
95 int resultsum = 0; /* bitmask of the filter criteria met by a process */
96 int found = 0; /* counter for number of lines returned in `ps` output */
97 int procs = 0; /* counter for number of processes meeting filter criteria */
98 int pos; /* number of spaces before 'args' in `ps` output */
99 int cols; /* number of columns in ps output */
100 int warn = 0; /* number of processes in warn state */
101 int crit = 0; /* number of processes in crit state */
102 int i = 0;
104 int result = STATE_UNKNOWN;
106 asprintf (&metric_name, "PROCS");
107 metric = METRIC_PROCS;
109 if (process_arguments (argc, argv) == ERROR)
110 usage (_("Unable to parse command line\n"));
112 if (verbose >= 2)
113 printf (_("CMD: %s\n"), PS_COMMAND);
115 child_process = spopen (PS_COMMAND);
116 if (child_process == NULL) {
117 printf (_("Could not open pipe: %s\n"), PS_COMMAND);
118 return STATE_UNKNOWN;
119 }
121 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
122 if (child_stderr == NULL)
123 printf (_("Could not open stderr for %s\n"), PS_COMMAND);
125 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
127 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
128 strcpy (procprog, "");
129 asprintf (&procargs, "%s", "");
131 cols = sscanf (input_buffer, PS_FORMAT, PS_VARLIST);
133 /* Zombie processes do not give a procprog command */
134 if ( cols == 6 && strstr(procstat, zombie) ) {
135 cols = 7;
136 /* Set some value for procargs for the strip command further below
137 Seen to be a problem on some Solaris 7 and 8 systems */
138 input_buffer[pos] = '\n';
139 input_buffer[pos+1] = 0x0;
140 }
141 if ( cols >= 7 ) {
142 resultsum = 0;
143 asprintf (&procargs, "%s", input_buffer + pos);
144 strip (procargs);
146 if ((options & STAT) && (strstr (statopts, procstat)))
147 resultsum |= STAT;
148 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
149 resultsum |= ARGS;
150 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
151 resultsum |= PROG;
152 if ((options & PPID) && (procppid == ppid))
153 resultsum |= PPID;
154 if ((options & USER) && (procuid == uid))
155 resultsum |= USER;
156 if ((options & VSZ) && (procvsz >= vsz))
157 resultsum |= VSZ;
158 if ((options & RSS) && (procrss >= rss))
159 resultsum |= RSS;
160 if ((options & PCPU) && (procpcpu >= pcpu))
161 resultsum |= PCPU;
163 if (verbose >= 3)
164 printf ("%d %d %d %d %d %.2f %s %s %s\n",
165 procs, procuid, procvsz, procrss,
166 procppid, procpcpu, procstat, procprog, procargs);
168 /* Ignore self */
169 if (strcmp (procprog, progname) == 0)
170 continue;
172 found++;
174 /* Next line if filters not matched */
175 if (!(options == resultsum || options == ALL))
176 continue;
178 procs++;
180 if (metric == METRIC_VSZ)
181 i = check_thresholds (procvsz);
182 else if (metric == METRIC_RSS)
183 i = check_thresholds (procrss);
184 /* TODO? float thresholds for --metric=CPU */
185 else if (metric == METRIC_CPU)
186 i = check_thresholds ((int)procpcpu);
188 if (metric != METRIC_PROCS) {
189 if (i == STATE_WARNING) {
190 warn++;
191 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
192 }
193 if (i == STATE_CRITICAL) {
194 crit++;
195 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
196 }
197 result = max_state (result, i);
198 }
199 }
200 /* This should not happen */
201 else if (verbose) {
202 printf(_("Not parseable: %s"), input_buffer);
203 }
204 }
206 /* If we get anything on STDERR, at least set warning */
207 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
208 if (verbose)
209 printf (_("STDERR: %s"), input_buffer);
210 result = max_state (result, STATE_WARNING);
211 printf (_("System call sent warnings to stderr\n"));
212 }
214 (void) fclose (child_stderr);
216 /* close the pipe */
217 if (spclose (child_process)) {
218 printf (_("System call returned nonzero status\n"));
219 result = max_state (result, STATE_WARNING);
220 }
222 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
223 printf (_("Unable to read output\n"));
224 return result;
225 }
227 if ( result == STATE_UNKNOWN )
228 result = STATE_OK;
230 /* Needed if procs found, but none match filter */
231 if ( metric == METRIC_PROCS ) {
232 result = max_state (result, check_thresholds (procs) );
233 }
235 if ( result == STATE_OK ) {
236 printf (_("%s OK: %d process%s"),
237 metric_name, procs, ( procs != 1 ? "es" : "") );
238 } else if (result == STATE_WARNING) {
239 if ( metric == METRIC_PROCS ) {
240 printf (_("PROCS WARNING: %d process%s"), procs,
241 ( procs != 1 ? "es" : ""));
242 } else {
243 printf (_("%s WARNING: %d warn out of %d process%s"),
244 metric_name, warn, procs,
245 ( procs != 1 ? "es" : ""));
246 }
247 } else if (result == STATE_CRITICAL) {
248 if (metric == METRIC_PROCS) {
249 printf (_("PROCS CRITICAL: %d process%s"), procs,
250 ( procs != 1 ? "es" : ""));
251 } else {
252 printf (_("%s CRITICAL: %d crit, %d warn out of %d process%s"),
253 metric_name, crit, warn, procs,
254 ( procs != 1 ? "es" : ""));
255 }
256 }
258 if (fmt != "") {
259 printf (" with %s", fmt);
260 }
262 if ( verbose >= 1 && fails != "" )
263 printf (" [%s]", fails);
265 printf ("\n");
266 return result;
267 }
273 \f
274 /* process command-line arguments */
275 int
276 process_arguments (int argc, char **argv)
277 {
278 int c = 1;
279 char *user;
280 struct passwd *pw;
281 int option_index = 0;
282 static struct option long_options[] = {
283 {"warning", required_argument, 0, 'w'},
284 {"critical", required_argument, 0, 'c'},
285 {"metric", required_argument, 0, 'm'},
286 {"timeout", required_argument, 0, 't'},
287 {"status", required_argument, 0, 's'},
288 {"ppid", required_argument, 0, 'p'},
289 {"command", required_argument, 0, 'C'},
290 {"vsz", required_argument, 0, 'z'},
291 {"rss", required_argument, 0, 'r'},
292 {"pcpu", required_argument, 0, 'P'},
293 {"argument-array", required_argument, 0, 'a'},
294 {"help", no_argument, 0, 'h'},
295 {"version", no_argument, 0, 'V'},
296 {"verbose", no_argument, 0, 'v'},
297 {0, 0, 0, 0}
298 };
300 for (c = 1; c < argc; c++)
301 if (strcmp ("-to", argv[c]) == 0)
302 strcpy (argv[c], "-t");
304 while (1) {
305 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
306 long_options, &option_index);
308 if (c == -1 || c == EOF)
309 break;
311 switch (c) {
312 case '?': /* help */
313 print_usage ();
314 exit (STATE_UNKNOWN);
315 case 'h': /* help */
316 print_help ();
317 exit (STATE_OK);
318 case 'V': /* version */
319 print_revision (progname, revision);
320 exit (STATE_OK);
321 case 't': /* timeout period */
322 if (!is_integer (optarg))
323 usage (_("Timeout Interval must be an integer!\n\n"));
324 else
325 timeout_interval = atoi (optarg);
326 break;
327 case 'c': /* critical threshold */
328 if (is_integer (optarg))
329 cmax = atoi (optarg);
330 else if (sscanf (optarg, ":%d", &cmax) == 1)
331 break;
332 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2)
333 break;
334 else if (sscanf (optarg, "%d:", &cmin) == 1)
335 break;
336 else
337 usage (_("Critical Process Count must be an integer!\n\n"));
338 break;
339 case 'w': /* warning time threshold */
340 if (is_integer (optarg))
341 wmax = atoi (optarg);
342 else if (sscanf (optarg, ":%d", &wmax) == 1)
343 break;
344 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2)
345 break;
346 else if (sscanf (optarg, "%d:", &wmin) == 1)
347 break;
348 else
349 usage (_("%s: Warning Process Count must be an integer!\n\n"));
350 break;
351 case 'p': /* process id */
352 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
353 asprintf (&fmt, "%s%sPPID = %d", fmt, (options ? ", " : ""), ppid);
354 options |= PPID;
355 break;
356 }
357 printf (_("%s: Parent Process ID must be an integer!\n\n"),
358 progname);
359 print_usage ();
360 exit (STATE_UNKNOWN);
361 case 's': /* status */
362 if (statopts)
363 break;
364 else
365 statopts = strdup(optarg);
366 asprintf (&fmt, _("%s%sSTATE = %s"), fmt, (options ? ", " : ""), statopts);
367 options |= STAT;
368 break;
369 case 'u': /* user or user id */
370 if (is_integer (optarg)) {
371 uid = atoi (optarg);
372 pw = getpwuid ((uid_t) uid);
373 /* check to be sure user exists */
374 if (pw == NULL) {
375 printf (_("UID %d was not found\n"), uid);
376 print_usage ();
377 exit (STATE_UNKNOWN);
378 }
379 }
380 else {
381 pw = getpwnam (optarg);
382 /* check to be sure user exists */
383 if (pw == NULL) {
384 printf (_("User name %s was not found\n"), optarg);
385 print_usage ();
386 exit (STATE_UNKNOWN);
387 }
388 /* then get uid */
389 uid = pw->pw_uid;
390 }
391 user = pw->pw_name;
392 asprintf (&fmt, _("%s%sUID = %d (%s)"), fmt, (options ? ", " : ""),
393 uid, user);
394 options |= USER;
395 break;
396 case 'C': /* command */
397 if (prog)
398 break;
399 else
400 prog = strdup(optarg);
401 asprintf (&fmt, _("%s%scommand name '%s'"), fmt, (options ? ", " : ""),
402 prog);
403 options |= PROG;
404 break;
405 case 'a': /* args (full path name with args) */
406 if (args)
407 break;
408 else
409 args = strdup(optarg);
410 asprintf (&fmt, _("%s%sargs '%s'"), fmt, (options ? ", " : ""), args);
411 options |= ARGS;
412 break;
413 case 'r': /* RSS */
414 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
415 asprintf (&fmt, _("%s%sRSS >= %d"), fmt, (options ? ", " : ""), rss);
416 options |= RSS;
417 break;
418 }
419 printf (_("%s: RSS must be an integer!\n\n"),
420 progname);
421 print_usage ();
422 exit (STATE_UNKNOWN);
423 case 'z': /* VSZ */
424 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
425 asprintf (&fmt, _("%s%sVSZ >= %d"), fmt, (options ? ", " : ""), vsz);
426 options |= VSZ;
427 break;
428 }
429 printf (_("%s: VSZ must be an integer!\n\n"),
430 progname);
431 print_usage ();
432 exit (STATE_UNKNOWN);
433 case 'P': /* PCPU */
434 /* TODO: -P 1.5.5 is accepted */
435 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
436 asprintf (&fmt, _("%s%sPCPU >= %.2f"), fmt, (options ? ", " : ""), pcpu);
437 options |= PCPU;
438 break;
439 }
440 printf (_("%s: PCPU must be a float!\n\n"),
441 progname);
442 print_usage ();
443 exit (STATE_UNKNOWN);
444 case 'm':
445 asprintf (&metric_name, "%s", optarg);
446 if ( strcmp(optarg, "PROCS") == 0) {
447 metric = METRIC_PROCS;
448 break;
449 }
450 else if ( strcmp(optarg, "VSZ") == 0) {
451 metric = METRIC_VSZ;
452 break;
453 }
454 else if ( strcmp(optarg, "RSS") == 0 ) {
455 metric = METRIC_RSS;
456 break;
457 }
458 else if ( strcmp(optarg, "CPU") == 0 ) {
459 metric = METRIC_CPU;
460 break;
461 }
462 printf (_("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n"),
463 progname);
464 print_usage ();
465 exit (STATE_UNKNOWN);
466 case 'v': /* command */
467 verbose++;
468 break;
469 }
470 }
472 c = optind;
473 if (wmax == -1 && argv[c])
474 wmax = atoi (argv[c++]);
475 if (cmax == -1 && argv[c])
476 cmax = atoi (argv[c++]);
477 if (statopts == NULL && argv[c]) {
478 asprintf (&statopts, "%s", argv[c++]);
479 asprintf (&fmt, _("%s%sSTATE = %s"), fmt, (options ? ", " : ""), statopts);
480 options |= STAT;
481 }
483 return validate_arguments ();
484 }
489 int
490 validate_arguments ()
491 {
493 if (wmax >= 0 && wmin == -1)
494 wmin = 0;
495 if (cmax >= 0 && cmin == -1)
496 cmin = 0;
497 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
498 if (wmax > cmax && cmax != -1) {
499 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax, cmax);
500 return ERROR;
501 }
502 if (cmin > wmin && wmin != -1) {
503 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin, cmin);
504 return ERROR;
505 }
506 }
508 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
509 /* printf ("At least one threshold must be set\n"); */
510 /* return ERROR; */
511 /* } */
513 if (options == 0)
514 options = ALL;
516 if (statopts==NULL)
517 statopts = strdup("");
519 if (prog==NULL)
520 prog = strdup("");
522 if (args==NULL)
523 args = strdup("");
525 if (fmt==NULL)
526 fmt = strdup("");
528 if (fails==NULL)
529 fails = strdup("");
531 return options;
532 }
538 \f
539 /* Check thresholds against value */
540 int
541 check_thresholds (int value)
542 {
543 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
544 return OK;
545 }
546 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
547 if (value > cmax && value < cmin)
548 return STATE_CRITICAL;
549 }
550 else if (cmax >= 0 && value > cmax) {
551 return STATE_CRITICAL;
552 }
553 else if (cmin >= 0 && value < cmin) {
554 return STATE_CRITICAL;
555 }
557 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
558 if (value > wmax && value < wmin) {
559 return STATE_WARNING;
560 }
561 }
562 else if (wmax >= 0 && value > wmax) {
563 return STATE_WARNING;
564 }
565 else if (wmin >= 0 && value < wmin) {
566 return STATE_WARNING;
567 }
568 return STATE_OK;
569 }
575 \f
576 void
577 print_help (void)
578 {
579 print_revision (progname, revision);
581 printf (_("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>"));
582 printf (_(COPYRIGHT), copyright, email);
584 printf(_("\
585 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
586 metric is outside the required threshold ranges. The metric defaults to number\n\
587 of processes. Search filters can be applied to limit the processes to check.\n\n"));
589 print_usage ();
591 printf(_("\n\
592 Required Arguments:\n\
593 -w, --warning=RANGE\n\
594 Generate warning state if metric is outside this range\n\
595 -c, --critical=RANGE\n\
596 Generate critical state if metric is outside this range\n"));
598 printf(_("\n\
599 Optional Arguments:\n\
600 -m, --metric=TYPE\n\
601 Check thresholds against metric. Valid types:\n\
602 PROCS - number of processes (default)\n\
603 VSZ - virtual memory size\n\
604 RSS - resident set memory size\n\
605 CPU - percentage cpu\n\
606 -v, --verbose\n\
607 Extra information. Up to 3 verbosity levels\n"));
609 printf(_("\n\
610 Optional Filters:\n\
611 -s, --state=STATUSFLAGS\n\
612 Only scan for processes that have, in the output of `ps`, one or\n\
613 more of the status flags you specify (for example R, Z, S, RS,\n\
614 RSZDT, plus others based on the output of your 'ps' command).\n\
615 -p, --ppid=PPID\n\
616 Only scan for children of the parent process ID indicated.\n\
617 -z, --vsz=VSZ\n\
618 Only scan for processes with vsz higher than indicated.\n\
619 -r, --rss=RSS\n\
620 Only scan for processes with rss higher than indicated.\n"));
622 printf(_("\
623 -P, --pcpu=PCPU\n\
624 Only scan for processes with pcpu higher than indicated.\n\
625 -u, --user=USER\n\
626 Only scan for processes with user name or ID indicated.\n\
627 -a, --argument-array=STRING\n\
628 Only scan for processes with args that contain STRING.\n\
629 -C, --command=COMMAND\n\
630 Only scan for exact matches to the named COMMAND.\n"));
632 printf(_("\n\
633 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
634 specified 'max:min', a warning status will be generated if the\n\
635 count is inside the specified range\n\n"));
637 printf(_("\
638 This plugin checks the number of currently running processes and\n\
639 generates WARNING or CRITICAL states if the process count is outside\n\
640 the specified threshold ranges. The process count can be filtered by\n\
641 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
642 be the total number of running processes\n\n"));
644 printf(_("\
645 Examples:\n\
646 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
647 Warning if not two processes with command name portsentry. Critical\n\
648 if < 2 or > 1024 processes\n\n\
649 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
650 Warning alert if > 10 processes with command arguments containing \n\
651 '/usr/local/bin/perl' and owned by root\n\n\
652 check_procs -w 50000 -c 100000 --metric=VSZ\n\
653 Alert if vsz of any processes over 50K or 100K\n\n"));
655 printf (_(UT_SUPPORT));
656 }
658 void
659 print_usage (void)
660 {
661 printf ("\
662 Usage: %s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n\
663 [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n\
664 [-C command] [-v]\n", progname);
665 printf (_(UT_HLP_VRS), progname, progname);
666 }