1 /******************************************************************************
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 ******************************************************************************/
19 const char *progname = "check_procs";
20 const char *revision = "$Revision$";
21 const char *copyright = "2000-2003";
22 const char *email = "nagiosplug-devel@lists.sourceforge.net";
24 #include "common.h"
25 #include "popen.h"
26 #include "utils.h"
27 #include <pwd.h>
29 int process_arguments (int, char **);
30 int validate_arguments (void);
31 int check_thresholds (int);
32 void print_help (void);
33 void print_usage (void);
35 int wmax = -1;
36 int cmax = -1;
37 int wmin = -1;
38 int cmin = -1;
40 int options = 0; /* bitmask of filter criteria to test against */
41 #define ALL 1
42 #define STAT 2
43 #define PPID 4
44 #define USER 8
45 #define PROG 16
46 #define ARGS 32
47 #define VSZ 64
48 #define RSS 128
49 #define PCPU 256
51 /* Different metrics */
52 char *metric_name;
53 enum metric {
54 METRIC_PROCS,
55 METRIC_VSZ,
56 METRIC_RSS,
57 METRIC_CPU
58 };
59 enum metric metric = METRIC_PROCS;
61 int verbose = 0;
62 int uid;
63 int ppid;
64 int vsz;
65 int rss;
66 float pcpu;
67 char *statopts;
68 char *prog;
69 char *args;
70 char *fmt;
71 char *fails;
72 char tmp[MAX_INPUT_BUFFER];
78 \f
79 int
80 main (int argc, char **argv)
81 {
82 char *input_buffer;
83 char *input_line;
84 char *procprog;
86 int procuid = 0;
87 int procppid = 0;
88 int procvsz = 0;
89 int procrss = 0;
90 float procpcpu = 0;
91 char procstat[8];
92 char *procargs;
93 char *temp_string;
95 const char *zombie = "Z";
97 int resultsum = 0; /* bitmask of the filter criteria met by a process */
98 int found = 0; /* counter for number of lines returned in `ps` output */
99 int procs = 0; /* counter for number of processes meeting filter criteria */
100 int pos; /* number of spaces before 'args' in `ps` output */
101 int cols; /* number of columns in ps output */
102 int expected_cols = PS_COLS - 1;
103 int warn = 0; /* number of processes in warn state */
104 int crit = 0; /* number of processes in crit state */
105 int i = 0;
107 int result = STATE_UNKNOWN;
109 setlocale (LC_ALL, "");
110 bindtextdomain (PACKAGE, LOCALEDIR);
111 textdomain (PACKAGE);
113 input_buffer = malloc (MAX_INPUT_BUFFER);
114 procprog = malloc (MAX_INPUT_BUFFER);
116 asprintf (&metric_name, "PROCS");
117 metric = METRIC_PROCS;
119 if (process_arguments (argc, argv) == ERROR)
120 usage (_("Unable to parse command line\n"));
122 if (verbose >= 2)
123 printf (_("CMD: %s\n"), PS_COMMAND);
125 child_process = spopen (PS_COMMAND);
126 if (child_process == NULL) {
127 printf (_("Could not open pipe: %s\n"), PS_COMMAND);
128 return STATE_UNKNOWN;
129 }
131 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
132 if (child_stderr == NULL)
133 printf (_("Could not open stderr for %s\n"), PS_COMMAND);
135 /* flush first line */
136 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
137 while ( input_buffer[strlen(input_buffer)-1] != '\n' )
138 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
140 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
141 asprintf (&input_line, "%s", input_buffer);
142 while ( input_buffer[strlen(input_buffer)-1] != '\n' ) {
143 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
144 asprintf (&input_line, "%s%s", input_line, input_buffer);
145 }
147 if (verbose >= 3)
148 printf ("%s", input_line);
150 strcpy (procprog, "");
151 asprintf (&procargs, "%s", "");
153 cols = sscanf (input_line, PS_FORMAT, PS_VARLIST);
155 /* Zombie processes do not give a procprog command */
156 if ( cols == (expected_cols - 1) && strstr(procstat, zombie) ) {
157 cols = expected_cols;
158 /* Set some value for procargs for the strip command further below
159 Seen to be a problem on some Solaris 7 and 8 systems */
160 input_buffer[pos] = '\n';
161 input_buffer[pos+1] = 0x0;
162 }
163 if ( cols >= expected_cols ) {
164 resultsum = 0;
165 asprintf (&procargs, "%s", input_line + pos);
166 strip (procargs);
168 /* Some ps return full pathname for command. This removes path */
169 temp_string = strtok ((char *)procprog, "/");
170 while (temp_string) {
171 strcpy(procprog, temp_string);
172 temp_string = strtok (NULL, "/");
173 }
175 if (verbose >= 3)
176 printf ("%d %d %d %d %d %.2f %s %s %s\n",
177 procs, procuid, procvsz, procrss,
178 procppid, procpcpu, procstat, procprog, procargs);
180 /* Ignore self */
181 if (strcmp (procprog, progname) == 0) {
182 continue;
183 }
185 if ((options & STAT) && (strstr (statopts, procstat)))
186 resultsum |= STAT;
187 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
188 resultsum |= ARGS;
189 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
190 resultsum |= PROG;
191 if ((options & PPID) && (procppid == ppid))
192 resultsum |= PPID;
193 if ((options & USER) && (procuid == uid))
194 resultsum |= USER;
195 if ((options & VSZ) && (procvsz >= vsz))
196 resultsum |= VSZ;
197 if ((options & RSS) && (procrss >= rss))
198 resultsum |= RSS;
199 if ((options & PCPU) && (procpcpu >= pcpu))
200 resultsum |= PCPU;
202 found++;
204 /* Next line if filters not matched */
205 if (!(options == resultsum || options == ALL))
206 continue;
208 procs++;
210 if (metric == METRIC_VSZ)
211 i = check_thresholds (procvsz);
212 else if (metric == METRIC_RSS)
213 i = check_thresholds (procrss);
214 /* TODO? float thresholds for --metric=CPU */
215 else if (metric == METRIC_CPU)
216 i = check_thresholds ((int)procpcpu);
218 if (metric != METRIC_PROCS) {
219 if (i == STATE_WARNING) {
220 warn++;
221 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
222 result = max_state (result, i);
223 }
224 if (i == STATE_CRITICAL) {
225 crit++;
226 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
227 result = max_state (result, i);
228 }
229 }
230 }
231 /* This should not happen */
232 else if (verbose) {
233 printf(_("Not parseable: %s"), input_buffer);
234 }
235 }
237 /* If we get anything on STDERR, at least set warning */
238 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
239 if (verbose)
240 printf (_("STDERR: %s"), input_buffer);
241 result = max_state (result, STATE_WARNING);
242 printf (_("System call sent warnings to stderr\n"));
243 }
245 (void) fclose (child_stderr);
247 /* close the pipe */
248 if (spclose (child_process)) {
249 printf (_("System call returned nonzero status\n"));
250 result = max_state (result, STATE_WARNING);
251 }
253 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
254 printf (_("Unable to read output\n"));
255 return result;
256 }
258 if ( result == STATE_UNKNOWN )
259 result = STATE_OK;
261 /* Needed if procs found, but none match filter */
262 if ( metric == METRIC_PROCS ) {
263 result = max_state (result, check_thresholds (procs) );
264 }
266 if ( result == STATE_OK ) {
267 printf ("%s %s: ", metric_name, _("OK"));
268 } else if (result == STATE_WARNING) {
269 printf ("%s %s: ", metric_name, _("WARNING"));
270 if ( metric != METRIC_PROCS ) {
271 printf (_("%d warn out of "), warn);
272 }
273 } else if (result == STATE_CRITICAL) {
274 printf ("%s %s: ", metric_name, _("CRITICAL"));
275 if (metric != METRIC_PROCS) {
276 printf (_("%d crit, %d warn out of "), crit, warn);
277 }
278 }
279 printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs);
281 if (strcmp(fmt,"") != 0) {
282 printf (_(" with %s"), fmt);
283 }
285 if ( verbose >= 1 && strcmp(fails,"") )
286 printf (" [%s]", fails);
288 printf ("\n");
289 return result;
290 }
296 \f
297 /* process command-line arguments */
298 int
299 process_arguments (int argc, char **argv)
300 {
301 int c = 1;
302 char *user;
303 struct passwd *pw;
304 int option = 0;
305 static struct option longopts[] = {
306 {"warning", required_argument, 0, 'w'},
307 {"critical", required_argument, 0, 'c'},
308 {"metric", required_argument, 0, 'm'},
309 {"timeout", required_argument, 0, 't'},
310 {"status", required_argument, 0, 's'},
311 {"ppid", required_argument, 0, 'p'},
312 {"command", required_argument, 0, 'C'},
313 {"vsz", required_argument, 0, 'z'},
314 {"rss", required_argument, 0, 'r'},
315 {"pcpu", required_argument, 0, 'P'},
316 {"argument-array", required_argument, 0, 'a'},
317 {"help", no_argument, 0, 'h'},
318 {"version", no_argument, 0, 'V'},
319 {"verbose", no_argument, 0, 'v'},
320 {0, 0, 0, 0}
321 };
323 for (c = 1; c < argc; c++)
324 if (strcmp ("-to", argv[c]) == 0)
325 strcpy (argv[c], "-t");
327 while (1) {
328 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
329 longopts, &option);
331 if (c == -1 || c == EOF)
332 break;
334 switch (c) {
335 case '?': /* help */
336 print_usage ();
337 exit (STATE_UNKNOWN);
338 case 'h': /* help */
339 print_help ();
340 exit (STATE_OK);
341 case 'V': /* version */
342 print_revision (progname, revision);
343 exit (STATE_OK);
344 case 't': /* timeout period */
345 if (!is_integer (optarg))
346 usage (_("Timeout Interval must be an integer!\n\n"));
347 else
348 timeout_interval = atoi (optarg);
349 break;
350 case 'c': /* critical threshold */
351 if (is_integer (optarg))
352 cmax = atoi (optarg);
353 else if (sscanf (optarg, ":%d", &cmax) == 1)
354 break;
355 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2)
356 break;
357 else if (sscanf (optarg, "%d:", &cmin) == 1)
358 break;
359 else
360 usage (_("Critical Process Count must be an integer!\n\n"));
361 break;
362 case 'w': /* warning threshold */
363 if (is_integer (optarg))
364 wmax = atoi (optarg);
365 else if (sscanf (optarg, ":%d", &wmax) == 1)
366 break;
367 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2)
368 break;
369 else if (sscanf (optarg, "%d:", &wmin) == 1)
370 break;
371 else
372 usage (_("Warning Process Count must be an integer!\n\n"));
373 break;
374 case 'p': /* process id */
375 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
376 asprintf (&fmt, "%s%sPPID = %d", (fmt ? fmt : "") , (options ? ", " : ""), ppid);
377 options |= PPID;
378 break;
379 }
380 usage2 (_("%s: Parent Process ID must be an integer!\n\n"), progname);
381 case 's': /* status */
382 if (statopts)
383 break;
384 else
385 statopts = optarg;
386 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
387 options |= STAT;
388 break;
389 case 'u': /* user or user id */
390 if (is_integer (optarg)) {
391 uid = atoi (optarg);
392 pw = getpwuid ((uid_t) uid);
393 /* check to be sure user exists */
394 if (pw == NULL)
395 usage2 (_("UID %s was not found\n"), optarg);
396 }
397 else {
398 pw = getpwnam (optarg);
399 /* check to be sure user exists */
400 if (pw == NULL)
401 usage2 (_("User name %s was not found\n"), optarg);
402 /* then get uid */
403 uid = pw->pw_uid;
404 }
405 user = pw->pw_name;
406 asprintf (&fmt, _("%s%sUID = %d (%s)"), (fmt ? fmt : ""), (options ? ", " : ""),
407 uid, user);
408 options |= USER;
409 break;
410 case 'C': /* command */
411 if (prog)
412 break;
413 else
414 prog = optarg;
415 asprintf (&fmt, _("%s%scommand name '%s'"), (fmt ? fmt : ""), (options ? ", " : ""),
416 prog);
417 options |= PROG;
418 break;
419 case 'a': /* args (full path name with args) */
420 if (args)
421 break;
422 else
423 args = optarg;
424 asprintf (&fmt, _("%s%sargs '%s'"), (fmt ? fmt : ""), (options ? ", " : ""), args);
425 options |= ARGS;
426 break;
427 case 'r': /* RSS */
428 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
429 asprintf (&fmt, _("%s%sRSS >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), rss);
430 options |= RSS;
431 break;
432 }
433 usage2 (_("%s: RSS must be an integer!\n\n"), progname);
434 case 'z': /* VSZ */
435 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
436 asprintf (&fmt, _("%s%sVSZ >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), vsz);
437 options |= VSZ;
438 break;
439 }
440 usage2 (_("%s: VSZ must be an integer!\n\n"), progname);
441 case 'P': /* PCPU */
442 /* TODO: -P 1.5.5 is accepted */
443 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
444 asprintf (&fmt, _("%s%sPCPU >= %.2f"), (fmt ? fmt : ""), (options ? ", " : ""), pcpu);
445 options |= PCPU;
446 break;
447 }
448 usage2 (_("%s: PCPU must be a float!\n\n"), progname);
449 case 'm':
450 asprintf (&metric_name, "%s", optarg);
451 if ( strcmp(optarg, "PROCS") == 0) {
452 metric = METRIC_PROCS;
453 break;
454 }
455 else if ( strcmp(optarg, "VSZ") == 0) {
456 metric = METRIC_VSZ;
457 break;
458 }
459 else if ( strcmp(optarg, "RSS") == 0 ) {
460 metric = METRIC_RSS;
461 break;
462 }
463 else if ( strcmp(optarg, "CPU") == 0 ) {
464 metric = METRIC_CPU;
465 break;
466 }
467 printf (_("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n"),
468 progname);
469 print_usage ();
470 exit (STATE_UNKNOWN);
471 case 'v': /* command */
472 verbose++;
473 break;
474 }
475 }
477 c = optind;
478 if (wmax == -1 && argv[c])
479 wmax = atoi (argv[c++]);
480 if (cmax == -1 && argv[c])
481 cmax = atoi (argv[c++]);
482 if (statopts == NULL && argv[c]) {
483 asprintf (&statopts, "%s", argv[c++]);
484 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
485 options |= STAT;
486 }
488 return validate_arguments ();
489 }
494 int
495 validate_arguments ()
496 {
498 if (wmax >= 0 && wmin == -1)
499 wmin = 0;
500 if (cmax >= 0 && cmin == -1)
501 cmin = 0;
502 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
503 if (wmax > cmax && cmax != -1) {
504 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax, cmax);
505 return ERROR;
506 }
507 if (cmin > wmin && wmin != -1) {
508 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin, cmin);
509 return ERROR;
510 }
511 }
513 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
514 /* printf ("At least one threshold must be set\n"); */
515 /* return ERROR; */
516 /* } */
518 if (options == 0)
519 options = ALL;
521 if (statopts==NULL)
522 statopts = strdup("");
524 if (prog==NULL)
525 prog = strdup("");
527 if (args==NULL)
528 args = strdup("");
530 if (fmt==NULL)
531 fmt = strdup("");
533 if (fails==NULL)
534 fails = strdup("");
536 return options;
537 }
543 \f
544 /* Check thresholds against value */
545 int
546 check_thresholds (int value)
547 {
548 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
549 return OK;
550 }
551 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
552 if (value > cmax && value < cmin)
553 return STATE_CRITICAL;
554 }
555 else if (cmax >= 0 && value > cmax) {
556 return STATE_CRITICAL;
557 }
558 else if (cmin >= 0 && value < cmin) {
559 return STATE_CRITICAL;
560 }
562 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
563 if (value > wmax && value < wmin) {
564 return STATE_WARNING;
565 }
566 }
567 else if (wmax >= 0 && value > wmax) {
568 return STATE_WARNING;
569 }
570 else if (wmin >= 0 && value < wmin) {
571 return STATE_WARNING;
572 }
573 return STATE_OK;
574 }
580 \f
581 void
582 print_help (void)
583 {
584 print_revision (progname, revision);
586 printf (_("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>"));
587 printf (_(COPYRIGHT), copyright, email);
589 printf(_("\
590 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
591 metric is outside the required threshold ranges. The metric defaults to number\n\
592 of processes. Search filters can be applied to limit the processes to check.\n\n"));
594 print_usage ();
596 printf(_("\n\
597 Required Arguments:\n\
598 -w, --warning=RANGE\n\
599 Generate warning state if metric is outside this range\n\
600 -c, --critical=RANGE\n\
601 Generate critical state if metric is outside this range\n"));
603 printf(_("\n\
604 Optional Arguments:\n\
605 -m, --metric=TYPE\n\
606 Check thresholds against metric. Valid types:\n\
607 PROCS - number of processes (default)\n\
608 VSZ - virtual memory size\n\
609 RSS - resident set memory size\n\
610 CPU - percentage cpu\n\
611 -v, --verbose\n\
612 Extra information. Up to 3 verbosity levels\n"));
614 printf(_("\n\
615 Optional Filters:\n\
616 -s, --state=STATUSFLAGS\n\
617 Only scan for processes that have, in the output of `ps`, one or\n\
618 more of the status flags you specify (for example R, Z, S, RS,\n\
619 RSZDT, plus others based on the output of your 'ps' command).\n\
620 -p, --ppid=PPID\n\
621 Only scan for children of the parent process ID indicated.\n\
622 -z, --vsz=VSZ\n\
623 Only scan for processes with vsz higher than indicated.\n\
624 -r, --rss=RSS\n\
625 Only scan for processes with rss higher than indicated.\n"));
627 printf(_("\
628 -P, --pcpu=PCPU\n\
629 Only scan for processes with pcpu higher than indicated.\n\
630 -u, --user=USER\n\
631 Only scan for processes with user name or ID indicated.\n\
632 -a, --argument-array=STRING\n\
633 Only scan for processes with args that contain STRING.\n\
634 -C, --command=COMMAND\n\
635 Only scan for exact matches of COMMAND (without path).\n"));
637 printf(_("\n\
638 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
639 specified 'max:min', a warning status will be generated if the\n\
640 count is inside the specified range\n\n"));
642 printf(_("\
643 This plugin checks the number of currently running processes and\n\
644 generates WARNING or CRITICAL states if the process count is outside\n\
645 the specified threshold ranges. The process count can be filtered by\n\
646 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
647 be the total number of running processes\n\n"));
649 printf(_("\
650 Examples:\n\
651 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
652 Warning if not two processes with command name portsentry. Critical\n\
653 if < 2 or > 1024 processes\n\n\
654 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
655 Warning alert if > 10 processes with command arguments containing \n\
656 '/usr/local/bin/perl' and owned by root\n\n\
657 check_procs -w 50000 -c 100000 --metric=VSZ\n\
658 Alert if vsz of any processes over 50K or 100K\n\
659 check_procs -w 10 -c 20 --metric=CPU\n\
660 Alert if cpu of any processes over 10% or 20%\n\n"));
662 printf (_(UT_SUPPORT));
663 }
665 void
666 print_usage (void)
667 {
668 printf ("\
669 Usage: %s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n\
670 [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n\
671 [-C command] [-v]\n", progname);
672 printf (_(UT_HLP_VRS), progname, progname);
673 }