1 /******************************************************************************
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 ******************************************************************************/
19 const char *progname = "check_procs";
20 const char *revision = "$Revision$";
21 const char *copyright = "2000-2003";
22 const char *email = "nagiosplug-devel@lists.sourceforge.net";
24 #include "common.h"
25 #include "popen.h"
26 #include "utils.h"
27 #include <pwd.h>
29 int process_arguments (int, char **);
30 int validate_arguments (void);
31 int check_thresholds (int);
32 void print_help (void);
33 void print_usage (void);
35 int wmax = -1;
36 int cmax = -1;
37 int wmin = -1;
38 int cmin = -1;
40 int options = 0; /* bitmask of filter criteria to test against */
41 #define ALL 1
42 #define STAT 2
43 #define PPID 4
44 #define USER 8
45 #define PROG 16
46 #define ARGS 32
47 #define VSZ 64
48 #define RSS 128
49 #define PCPU 256
51 /* Different metrics */
52 char *metric_name;
53 enum metric {
54 METRIC_PROCS,
55 METRIC_VSZ,
56 METRIC_RSS,
57 METRIC_CPU
58 };
59 enum metric metric = METRIC_PROCS;
61 int verbose = 0;
62 int uid;
63 int ppid;
64 int vsz;
65 int rss;
66 float pcpu;
67 char *statopts;
68 char *prog;
69 char *args;
70 char *fmt;
71 char *fails;
72 char tmp[MAX_INPUT_BUFFER];
78 \f
79 int
80 main (int argc, char **argv)
81 {
82 char input_buffer[MAX_INPUT_BUFFER];
84 int procuid = 0;
85 int procppid = 0;
86 int procvsz = 0;
87 int procrss = 0;
88 float procpcpu = 0;
89 char procstat[8];
90 char procprog[MAX_INPUT_BUFFER];
91 char *procargs;
92 char *temp_string;
94 const char *zombie = "Z";
96 int resultsum = 0; /* bitmask of the filter criteria met by a process */
97 int found = 0; /* counter for number of lines returned in `ps` output */
98 int procs = 0; /* counter for number of processes meeting filter criteria */
99 int pos; /* number of spaces before 'args' in `ps` output */
100 int cols; /* number of columns in ps output */
101 int expected_cols = PS_COLS - 1;
102 int warn = 0; /* number of processes in warn state */
103 int crit = 0; /* number of processes in crit state */
104 int i = 0;
106 int result = STATE_UNKNOWN;
108 setlocale (LC_ALL, "");
109 bindtextdomain (PACKAGE, LOCALEDIR);
110 textdomain (PACKAGE);
112 asprintf (&metric_name, "PROCS");
113 metric = METRIC_PROCS;
115 if (process_arguments (argc, argv) == ERROR)
116 usage (_("Unable to parse command line\n"));
118 if (verbose >= 2)
119 printf (_("CMD: %s\n"), PS_COMMAND);
121 child_process = spopen (PS_COMMAND);
122 if (child_process == NULL) {
123 printf (_("Could not open pipe: %s\n"), PS_COMMAND);
124 return STATE_UNKNOWN;
125 }
127 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
128 if (child_stderr == NULL)
129 printf (_("Could not open stderr for %s\n"), PS_COMMAND);
131 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
133 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
134 strcpy (procprog, "");
135 asprintf (&procargs, "%s", "");
137 cols = sscanf (input_buffer, PS_FORMAT, PS_VARLIST);
139 /* Zombie processes do not give a procprog command */
140 if ( cols == (expected_cols - 1) && strstr(procstat, zombie) ) {
141 cols = expected_cols;
142 /* Set some value for procargs for the strip command further below
143 Seen to be a problem on some Solaris 7 and 8 systems */
144 input_buffer[pos] = '\n';
145 input_buffer[pos+1] = 0x0;
146 }
147 if ( cols >= expected_cols ) {
148 resultsum = 0;
149 asprintf (&procargs, "%s", input_buffer + pos);
150 strip (procargs);
152 /* Some ps return full pathname for command. This removes path */
153 temp_string = strtok ((char *)procprog, "/");
154 while (temp_string) {
155 strcpy(procprog, temp_string);
156 temp_string = strtok (NULL, "/");
157 }
159 if (verbose >= 3)
160 printf ("%d %d %d %d %d %.2f %s %s %s\n",
161 procs, procuid, procvsz, procrss,
162 procppid, procpcpu, procstat, procprog, procargs);
164 /* Ignore self */
165 if (strcmp (procprog, progname) == 0) {
166 continue;
167 }
169 if ((options & STAT) && (strstr (statopts, procstat)))
170 resultsum |= STAT;
171 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
172 resultsum |= ARGS;
173 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
174 resultsum |= PROG;
175 if ((options & PPID) && (procppid == ppid))
176 resultsum |= PPID;
177 if ((options & USER) && (procuid == uid))
178 resultsum |= USER;
179 if ((options & VSZ) && (procvsz >= vsz))
180 resultsum |= VSZ;
181 if ((options & RSS) && (procrss >= rss))
182 resultsum |= RSS;
183 if ((options & PCPU) && (procpcpu >= pcpu))
184 resultsum |= PCPU;
186 found++;
188 /* Next line if filters not matched */
189 if (!(options == resultsum || options == ALL))
190 continue;
192 procs++;
194 if (metric == METRIC_VSZ)
195 i = check_thresholds (procvsz);
196 else if (metric == METRIC_RSS)
197 i = check_thresholds (procrss);
198 /* TODO? float thresholds for --metric=CPU */
199 else if (metric == METRIC_CPU)
200 i = check_thresholds ((int)procpcpu);
202 if (metric != METRIC_PROCS) {
203 if (i == STATE_WARNING) {
204 warn++;
205 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
206 result = max_state (result, i);
207 }
208 if (i == STATE_CRITICAL) {
209 crit++;
210 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
211 result = max_state (result, i);
212 }
213 }
214 }
215 /* This should not happen */
216 else if (verbose) {
217 printf(_("Not parseable: %s"), input_buffer);
218 }
219 }
221 /* If we get anything on STDERR, at least set warning */
222 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
223 if (verbose)
224 printf (_("STDERR: %s"), input_buffer);
225 result = max_state (result, STATE_WARNING);
226 printf (_("System call sent warnings to stderr\n"));
227 }
229 (void) fclose (child_stderr);
231 /* close the pipe */
232 if (spclose (child_process)) {
233 printf (_("System call returned nonzero status\n"));
234 result = max_state (result, STATE_WARNING);
235 }
237 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
238 printf (_("Unable to read output\n"));
239 return result;
240 }
242 if ( result == STATE_UNKNOWN )
243 result = STATE_OK;
245 /* Needed if procs found, but none match filter */
246 if ( metric == METRIC_PROCS ) {
247 result = max_state (result, check_thresholds (procs) );
248 }
250 if ( result == STATE_OK ) {
251 printf ("%s %s: ", metric_name, _("OK"));
252 } else if (result == STATE_WARNING) {
253 printf ("%s %s: ", metric_name, _("WARNING"));
254 if ( metric != METRIC_PROCS ) {
255 printf (_("%d warn out of "), warn);
256 }
257 } else if (result == STATE_CRITICAL) {
258 printf ("%s %s: ", metric_name, _("CRITICAL"));
259 if (metric != METRIC_PROCS) {
260 printf (_("%d crit, %d warn out of "), crit, warn);
261 }
262 }
263 printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs);
265 if (strcmp(fmt,"") != 0) {
266 printf (_(" with %s"), fmt);
267 }
269 if ( verbose >= 1 && strcmp(fails,"") )
270 printf (" [%s]", fails);
272 printf ("\n");
273 return result;
274 }
280 \f
281 /* process command-line arguments */
282 int
283 process_arguments (int argc, char **argv)
284 {
285 int c = 1;
286 char *user;
287 struct passwd *pw;
288 int option = 0;
289 static struct option longopts[] = {
290 {"warning", required_argument, 0, 'w'},
291 {"critical", required_argument, 0, 'c'},
292 {"metric", required_argument, 0, 'm'},
293 {"timeout", required_argument, 0, 't'},
294 {"status", required_argument, 0, 's'},
295 {"ppid", required_argument, 0, 'p'},
296 {"command", required_argument, 0, 'C'},
297 {"vsz", required_argument, 0, 'z'},
298 {"rss", required_argument, 0, 'r'},
299 {"pcpu", required_argument, 0, 'P'},
300 {"argument-array", required_argument, 0, 'a'},
301 {"help", no_argument, 0, 'h'},
302 {"version", no_argument, 0, 'V'},
303 {"verbose", no_argument, 0, 'v'},
304 {0, 0, 0, 0}
305 };
307 for (c = 1; c < argc; c++)
308 if (strcmp ("-to", argv[c]) == 0)
309 strcpy (argv[c], "-t");
311 while (1) {
312 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
313 longopts, &option);
315 if (c == -1 || c == EOF)
316 break;
318 switch (c) {
319 case '?': /* help */
320 print_usage ();
321 exit (STATE_UNKNOWN);
322 case 'h': /* help */
323 print_help ();
324 exit (STATE_OK);
325 case 'V': /* version */
326 print_revision (progname, revision);
327 exit (STATE_OK);
328 case 't': /* timeout period */
329 if (!is_integer (optarg))
330 usage (_("Timeout Interval must be an integer!\n\n"));
331 else
332 timeout_interval = atoi (optarg);
333 break;
334 case 'c': /* critical threshold */
335 if (is_integer (optarg))
336 cmax = atoi (optarg);
337 else if (sscanf (optarg, ":%d", &cmax) == 1)
338 break;
339 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2)
340 break;
341 else if (sscanf (optarg, "%d:", &cmin) == 1)
342 break;
343 else
344 usage (_("Critical Process Count must be an integer!\n\n"));
345 break;
346 case 'w': /* warning threshold */
347 if (is_integer (optarg))
348 wmax = atoi (optarg);
349 else if (sscanf (optarg, ":%d", &wmax) == 1)
350 break;
351 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2)
352 break;
353 else if (sscanf (optarg, "%d:", &wmin) == 1)
354 break;
355 else
356 usage (_("Warning Process Count must be an integer!\n\n"));
357 break;
358 case 'p': /* process id */
359 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
360 asprintf (&fmt, "%s%sPPID = %d", (fmt ? fmt : "") , (options ? ", " : ""), ppid);
361 options |= PPID;
362 break;
363 }
364 usage2 (_("%s: Parent Process ID must be an integer!\n\n"), progname);
365 case 's': /* status */
366 if (statopts)
367 break;
368 else
369 statopts = optarg;
370 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
371 options |= STAT;
372 break;
373 case 'u': /* user or user id */
374 if (is_integer (optarg)) {
375 uid = atoi (optarg);
376 pw = getpwuid ((uid_t) uid);
377 /* check to be sure user exists */
378 if (pw == NULL)
379 usage2 (_("UID %d was not found\n"), uid);
380 }
381 else {
382 pw = getpwnam (optarg);
383 /* check to be sure user exists */
384 if (pw == NULL)
385 usage2 (_("User name %s was not found\n"), optarg);
386 /* then get uid */
387 uid = pw->pw_uid;
388 }
389 user = pw->pw_name;
390 asprintf (&fmt, _("%s%sUID = %d (%s)"), (fmt ? fmt : ""), (options ? ", " : ""),
391 uid, user);
392 options |= USER;
393 break;
394 case 'C': /* command */
395 if (prog)
396 break;
397 else
398 prog = optarg;
399 asprintf (&fmt, _("%s%scommand name '%s'"), (fmt ? fmt : ""), (options ? ", " : ""),
400 prog);
401 options |= PROG;
402 break;
403 case 'a': /* args (full path name with args) */
404 if (args)
405 break;
406 else
407 args = optarg;
408 asprintf (&fmt, _("%s%sargs '%s'"), (fmt ? fmt : ""), (options ? ", " : ""), args);
409 options |= ARGS;
410 break;
411 case 'r': /* RSS */
412 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
413 asprintf (&fmt, _("%s%sRSS >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), rss);
414 options |= RSS;
415 break;
416 }
417 usage2 (_("%s: RSS must be an integer!\n\n"), progname);
418 case 'z': /* VSZ */
419 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
420 asprintf (&fmt, _("%s%sVSZ >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), vsz);
421 options |= VSZ;
422 break;
423 }
424 usage2 (_("%s: VSZ must be an integer!\n\n"), progname);
425 case 'P': /* PCPU */
426 /* TODO: -P 1.5.5 is accepted */
427 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
428 asprintf (&fmt, _("%s%sPCPU >= %.2f"), (fmt ? fmt : ""), (options ? ", " : ""), pcpu);
429 options |= PCPU;
430 break;
431 }
432 usage2 (_("%s: PCPU must be a float!\n\n"), progname);
433 case 'm':
434 asprintf (&metric_name, "%s", optarg);
435 if ( strcmp(optarg, "PROCS") == 0) {
436 metric = METRIC_PROCS;
437 break;
438 }
439 else if ( strcmp(optarg, "VSZ") == 0) {
440 metric = METRIC_VSZ;
441 break;
442 }
443 else if ( strcmp(optarg, "RSS") == 0 ) {
444 metric = METRIC_RSS;
445 break;
446 }
447 else if ( strcmp(optarg, "CPU") == 0 ) {
448 metric = METRIC_CPU;
449 break;
450 }
451 printf (_("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n"),
452 progname);
453 print_usage ();
454 exit (STATE_UNKNOWN);
455 case 'v': /* command */
456 verbose++;
457 break;
458 }
459 }
461 c = optind;
462 if (wmax == -1 && argv[c])
463 wmax = atoi (argv[c++]);
464 if (cmax == -1 && argv[c])
465 cmax = atoi (argv[c++]);
466 if (statopts == NULL && argv[c]) {
467 asprintf (&statopts, "%s", argv[c++]);
468 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
469 options |= STAT;
470 }
472 return validate_arguments ();
473 }
478 int
479 validate_arguments ()
480 {
482 if (wmax >= 0 && wmin == -1)
483 wmin = 0;
484 if (cmax >= 0 && cmin == -1)
485 cmin = 0;
486 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
487 if (wmax > cmax && cmax != -1) {
488 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax, cmax);
489 return ERROR;
490 }
491 if (cmin > wmin && wmin != -1) {
492 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin, cmin);
493 return ERROR;
494 }
495 }
497 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
498 /* printf ("At least one threshold must be set\n"); */
499 /* return ERROR; */
500 /* } */
502 if (options == 0)
503 options = ALL;
505 if (statopts==NULL)
506 statopts = strdup("");
508 if (prog==NULL)
509 prog = strdup("");
511 if (args==NULL)
512 args = strdup("");
514 if (fmt==NULL)
515 fmt = strdup("");
517 if (fails==NULL)
518 fails = strdup("");
520 return options;
521 }
527 \f
528 /* Check thresholds against value */
529 int
530 check_thresholds (int value)
531 {
532 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
533 return OK;
534 }
535 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
536 if (value > cmax && value < cmin)
537 return STATE_CRITICAL;
538 }
539 else if (cmax >= 0 && value > cmax) {
540 return STATE_CRITICAL;
541 }
542 else if (cmin >= 0 && value < cmin) {
543 return STATE_CRITICAL;
544 }
546 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
547 if (value > wmax && value < wmin) {
548 return STATE_WARNING;
549 }
550 }
551 else if (wmax >= 0 && value > wmax) {
552 return STATE_WARNING;
553 }
554 else if (wmin >= 0 && value < wmin) {
555 return STATE_WARNING;
556 }
557 return STATE_OK;
558 }
564 \f
565 void
566 print_help (void)
567 {
568 print_revision (progname, revision);
570 printf (_("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>"));
571 printf (_(COPYRIGHT), copyright, email);
573 printf(_("\
574 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
575 metric is outside the required threshold ranges. The metric defaults to number\n\
576 of processes. Search filters can be applied to limit the processes to check.\n\n"));
578 print_usage ();
580 printf(_("\n\
581 Required Arguments:\n\
582 -w, --warning=RANGE\n\
583 Generate warning state if metric is outside this range\n\
584 -c, --critical=RANGE\n\
585 Generate critical state if metric is outside this range\n"));
587 printf(_("\n\
588 Optional Arguments:\n\
589 -m, --metric=TYPE\n\
590 Check thresholds against metric. Valid types:\n\
591 PROCS - number of processes (default)\n\
592 VSZ - virtual memory size\n\
593 RSS - resident set memory size\n\
594 CPU - percentage cpu\n\
595 -v, --verbose\n\
596 Extra information. Up to 3 verbosity levels\n"));
598 printf(_("\n\
599 Optional Filters:\n\
600 -s, --state=STATUSFLAGS\n\
601 Only scan for processes that have, in the output of `ps`, one or\n\
602 more of the status flags you specify (for example R, Z, S, RS,\n\
603 RSZDT, plus others based on the output of your 'ps' command).\n\
604 -p, --ppid=PPID\n\
605 Only scan for children of the parent process ID indicated.\n\
606 -z, --vsz=VSZ\n\
607 Only scan for processes with vsz higher than indicated.\n\
608 -r, --rss=RSS\n\
609 Only scan for processes with rss higher than indicated.\n"));
611 printf(_("\
612 -P, --pcpu=PCPU\n\
613 Only scan for processes with pcpu higher than indicated.\n\
614 -u, --user=USER\n\
615 Only scan for processes with user name or ID indicated.\n\
616 -a, --argument-array=STRING\n\
617 Only scan for processes with args that contain STRING.\n\
618 -C, --command=COMMAND\n\
619 Only scan for exact matches of COMMAND (without path).\n"));
621 printf(_("\n\
622 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
623 specified 'max:min', a warning status will be generated if the\n\
624 count is inside the specified range\n\n"));
626 printf(_("\
627 This plugin checks the number of currently running processes and\n\
628 generates WARNING or CRITICAL states if the process count is outside\n\
629 the specified threshold ranges. The process count can be filtered by\n\
630 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
631 be the total number of running processes\n\n"));
633 printf(_("\
634 Examples:\n\
635 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
636 Warning if not two processes with command name portsentry. Critical\n\
637 if < 2 or > 1024 processes\n\n\
638 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
639 Warning alert if > 10 processes with command arguments containing \n\
640 '/usr/local/bin/perl' and owned by root\n\n\
641 check_procs -w 50000 -c 100000 --metric=VSZ\n\
642 Alert if vsz of any processes over 50K or 100K\n\
643 check_procs -w 10 -c 20 --metric=CPU\n\
644 Alert if cpu of any processes over 10% or 20%\n\n"));
646 printf (_(UT_SUPPORT));
647 }
649 void
650 print_usage (void)
651 {
652 printf ("\
653 Usage: %s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n\
654 [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n\
655 [-C command] [-v]\n", progname);
656 printf (_(UT_HLP_VRS), progname, progname);
657 }