1 /******************************************************************************
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 ******************************************************************************/
19 const char *progname = "check_procs";
20 const char *revision = "$Revision$";
21 const char *copyright = "2000-2003";
22 const char *email = "nagiosplug-devel@lists.sourceforge.net";
24 #include "common.h"
25 #include "popen.h"
26 #include "utils.h"
27 #include <pwd.h>
29 int process_arguments (int, char **);
30 int validate_arguments (void);
31 int check_thresholds (int);
32 void print_help (void);
33 void print_usage (void);
35 int wmax = -1;
36 int cmax = -1;
37 int wmin = -1;
38 int cmin = -1;
40 int options = 0; /* bitmask of filter criteria to test against */
41 #define ALL 1
42 #define STAT 2
43 #define PPID 4
44 #define USER 8
45 #define PROG 16
46 #define ARGS 32
47 #define VSZ 64
48 #define RSS 128
49 #define PCPU 256
51 /* Different metrics */
52 int metric = 0;
53 #define METRIC_PROCS 0
54 #define METRIC_VSZ 1
55 #define METRIC_RSS 2
56 #define METRIC_CPU 3
57 char *metric_name = "";
59 int verbose = 0;
60 int uid;
61 int ppid;
62 int vsz;
63 int rss;
64 float pcpu;
65 char *statopts = "";
66 char *prog = "";
67 char *args = "";
68 char *fmt = "";
69 char *fails = "";
70 char tmp[MAX_INPUT_BUFFER];
76 \f
77 int
78 main (int argc, char **argv)
79 {
80 char input_buffer[MAX_INPUT_BUFFER];
82 int procuid = 0;
83 int procppid = 0;
84 int procvsz = 0;
85 int procrss = 0;
86 float procpcpu = 0;
87 char procstat[8];
88 char procprog[MAX_INPUT_BUFFER];
89 char *procargs;
91 const char *zombie = "Z";
93 int resultsum = 0; /* bitmask of the filter criteria met by a process */
94 int found = 0; /* counter for number of lines returned in `ps` output */
95 int procs = 0; /* counter for number of processes meeting filter criteria */
96 int pos; /* number of spaces before 'args' in `ps` output */
97 int cols; /* number of columns in ps output */
98 int warn = 0; /* number of processes in warn state */
99 int crit = 0; /* number of processes in crit state */
100 int i = 0;
102 int result = STATE_UNKNOWN;
104 asprintf (&metric_name, "PROCS");
105 metric = METRIC_PROCS;
107 if (process_arguments (argc, argv) == ERROR)
108 usage (_("Unable to parse command line\n"));
110 if (verbose >= 2)
111 printf (_("CMD: %s\n"), PS_COMMAND);
113 child_process = spopen (PS_COMMAND);
114 if (child_process == NULL) {
115 printf (_("Could not open pipe: %s\n"), PS_COMMAND);
116 return STATE_UNKNOWN;
117 }
119 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
120 if (child_stderr == NULL)
121 printf (_("Could not open stderr for %s\n"), PS_COMMAND);
123 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
125 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
126 strcpy (procprog, "");
127 asprintf (&procargs, "%s", "");
129 cols = sscanf (input_buffer, PS_FORMAT, PS_VARLIST);
131 /* Zombie processes do not give a procprog command */
132 if ( cols == 6 && strstr(procstat, zombie) ) {
133 cols = 7;
134 /* Set some value for procargs for the strip command further below
135 Seen to be a problem on some Solaris 7 and 8 systems */
136 input_buffer[pos] = '\n';
137 input_buffer[pos+1] = 0x0;
138 }
139 if ( cols >= 7 ) {
140 resultsum = 0;
141 asprintf (&procargs, "%s", input_buffer + pos);
142 strip (procargs);
144 if ((options & STAT) && (strstr (statopts, procstat)))
145 resultsum |= STAT;
146 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
147 resultsum |= ARGS;
148 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
149 resultsum |= PROG;
150 if ((options & PPID) && (procppid == ppid))
151 resultsum |= PPID;
152 if ((options & USER) && (procuid == uid))
153 resultsum |= USER;
154 if ((options & VSZ) && (procvsz >= vsz))
155 resultsum |= VSZ;
156 if ((options & RSS) && (procrss >= rss))
157 resultsum |= RSS;
158 if ((options & PCPU) && (procpcpu >= pcpu))
159 resultsum |= PCPU;
161 if (verbose >= 3)
162 printf ("%d %d %d %d %d %.2f %s %s %s\n",
163 procs, procuid, procvsz, procrss,
164 procppid, procpcpu, procstat, procprog, procargs);
166 /* Ignore self */
167 if (strcmp (procprog, progname) == 0)
168 continue;
170 found++;
172 /* Next line if filters not matched */
173 if (!(options == resultsum || options == ALL))
174 continue;
176 procs++;
178 if (metric == METRIC_VSZ)
179 i = check_thresholds (procvsz);
180 else if (metric == METRIC_RSS)
181 i = check_thresholds (procrss);
182 /* TODO? float thresholds for --metric=CPU */
183 else if (metric == METRIC_CPU)
184 i = check_thresholds ((int)procpcpu);
186 if (metric != METRIC_PROCS) {
187 if (i == STATE_WARNING) {
188 warn++;
189 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
190 }
191 if (i == STATE_CRITICAL) {
192 crit++;
193 asprintf (&fails, "%s%s%s", fails, (fails == "" ? "" : ", "), procprog);
194 }
195 result = max_state (result, i);
196 }
197 }
198 /* This should not happen */
199 else if (verbose) {
200 printf(_("Not parseable: %s"), input_buffer);
201 }
202 }
204 /* If we get anything on STDERR, at least set warning */
205 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
206 if (verbose)
207 printf (_("STDERR: %s"), input_buffer);
208 result = max_state (result, STATE_WARNING);
209 printf (_("System call sent warnings to stderr\n"));
210 }
212 (void) fclose (child_stderr);
214 /* close the pipe */
215 if (spclose (child_process)) {
216 printf (_("System call returned nonzero status\n"));
217 result = max_state (result, STATE_WARNING);
218 }
220 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
221 printf (_("Unable to read output\n"));
222 return result;
223 }
225 if ( result == STATE_UNKNOWN )
226 result = STATE_OK;
228 /* Needed if procs found, but none match filter */
229 if ( metric == METRIC_PROCS ) {
230 result = max_state (result, check_thresholds (procs) );
231 }
233 if ( result == STATE_OK ) {
234 printf (_("%s OK: %d process%s"),
235 metric_name, procs, ( procs != 1 ? "es" : "") );
236 } else if (result == STATE_WARNING) {
237 if ( metric == METRIC_PROCS ) {
238 printf (_("PROCS WARNING: %d process%s"), procs,
239 ( procs != 1 ? "es" : ""));
240 } else {
241 printf (_("%s WARNING: %d warn out of %d process%s"),
242 metric_name, warn, procs,
243 ( procs != 1 ? "es" : ""));
244 }
245 } else if (result == STATE_CRITICAL) {
246 if (metric == METRIC_PROCS) {
247 printf (_("PROCS CRITICAL: %d process%s"), procs,
248 ( procs != 1 ? "es" : ""));
249 } else {
250 printf (_("%s CRITICAL: %d crit, %d warn out of %d process%s"),
251 metric_name, crit, warn, procs,
252 ( procs != 1 ? "es" : ""));
253 }
254 }
256 if (fmt != "") {
257 printf (" with %s", fmt);
258 }
260 if ( verbose >= 1 && fails != "" )
261 printf (" [%s]", fails);
263 printf ("\n");
264 return result;
265 }
271 \f
272 /* process command-line arguments */
273 int
274 process_arguments (int argc, char **argv)
275 {
276 int c = 1;
277 char *user;
278 struct passwd *pw;
279 int option_index = 0;
280 static struct option long_options[] = {
281 {"warning", required_argument, 0, 'w'},
282 {"critical", required_argument, 0, 'c'},
283 {"metric", required_argument, 0, 'm'},
284 {"timeout", required_argument, 0, 't'},
285 {"status", required_argument, 0, 's'},
286 {"ppid", required_argument, 0, 'p'},
287 {"command", required_argument, 0, 'C'},
288 {"vsz", required_argument, 0, 'z'},
289 {"rss", required_argument, 0, 'r'},
290 {"pcpu", required_argument, 0, 'P'},
291 {"argument-array", required_argument, 0, 'a'},
292 {"help", no_argument, 0, 'h'},
293 {"version", no_argument, 0, 'V'},
294 {"verbose", no_argument, 0, 'v'},
295 {0, 0, 0, 0}
296 };
298 for (c = 1; c < argc; c++)
299 if (strcmp ("-to", argv[c]) == 0)
300 strcpy (argv[c], "-t");
302 while (1) {
303 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
304 long_options, &option_index);
306 if (c == -1 || c == EOF)
307 break;
309 switch (c) {
310 case '?': /* help */
311 print_usage ();
312 exit (STATE_UNKNOWN);
313 case 'h': /* help */
314 print_help ();
315 exit (STATE_OK);
316 case 'V': /* version */
317 print_revision (progname, revision);
318 exit (STATE_OK);
319 case 't': /* timeout period */
320 if (!is_integer (optarg)) {
321 printf (_("%s: Timeout Interval must be an integer!\n\n"),
322 progname);
323 print_usage ();
324 exit (STATE_UNKNOWN);
325 }
326 timeout_interval = atoi (optarg);
327 break;
328 case 'c': /* critical threshold */
329 if (is_integer (optarg)) {
330 cmax = atoi (optarg);
331 break;
332 }
333 else if (sscanf (optarg, ":%d", &cmax) == 1) {
334 break;
335 }
336 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2) {
337 break;
338 }
339 else if (sscanf (optarg, "%d:", &cmin) == 1) {
340 break;
341 }
342 else {
343 printf (_("%s: Critical Process Count must be an integer!\n\n"),
344 progname);
345 print_usage ();
346 exit (STATE_UNKNOWN);
347 }
348 case 'w': /* warning time threshold */
349 if (is_integer (optarg)) {
350 wmax = atoi (optarg);
351 break;
352 }
353 else if (sscanf (optarg, ":%d", &wmax) == 1) {
354 break;
355 }
356 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2) {
357 break;
358 }
359 else if (sscanf (optarg, "%d:", &wmin) == 1) {
360 break;
361 }
362 else {
363 printf (_("%s: Warning Process Count must be an integer!\n\n"),
364 progname);
365 print_usage ();
366 exit (STATE_UNKNOWN);
367 }
368 case 'p': /* process id */
369 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
370 asprintf (&fmt, "%s%sPPID = %d", fmt, (options ? ", " : ""), ppid);
371 options |= PPID;
372 break;
373 }
374 printf (_("%s: Parent Process ID must be an integer!\n\n"),
375 progname);
376 print_usage ();
377 exit (STATE_UNKNOWN);
378 case 's': /* status */
379 asprintf (&statopts, "%s", optarg);
380 asprintf (&fmt, _("%s%sSTATE = %s"), fmt, (options ? ", " : ""), statopts);
381 options |= STAT;
382 break;
383 case 'u': /* user or user id */
384 if (is_integer (optarg)) {
385 uid = atoi (optarg);
386 pw = getpwuid ((uid_t) uid);
387 /* check to be sure user exists */
388 if (pw == NULL) {
389 printf (_("UID %d was not found\n"), uid);
390 print_usage ();
391 exit (STATE_UNKNOWN);
392 }
393 }
394 else {
395 pw = getpwnam (optarg);
396 /* check to be sure user exists */
397 if (pw == NULL) {
398 printf (_("User name %s was not found\n"), optarg);
399 print_usage ();
400 exit (STATE_UNKNOWN);
401 }
402 /* then get uid */
403 uid = pw->pw_uid;
404 }
405 user = pw->pw_name;
406 asprintf (&fmt, _("%s%sUID = %d (%s)"), fmt, (options ? ", " : ""),
407 uid, user);
408 options |= USER;
409 break;
410 case 'C': /* command */
411 asprintf (&prog, "%s", optarg);
412 asprintf (&fmt, _("%s%scommand name '%s'"), fmt, (options ? ", " : ""),
413 prog);
414 options |= PROG;
415 break;
416 case 'a': /* args (full path name with args) */
417 asprintf (&args, "%s", optarg);
418 asprintf (&fmt, _("%s%sargs '%s'"), fmt, (options ? ", " : ""), args);
419 options |= ARGS;
420 break;
421 case 'r': /* RSS */
422 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
423 asprintf (&fmt, _("%s%sRSS >= %d"), fmt, (options ? ", " : ""), rss);
424 options |= RSS;
425 break;
426 }
427 printf (_("%s: RSS must be an integer!\n\n"),
428 progname);
429 print_usage ();
430 exit (STATE_UNKNOWN);
431 case 'z': /* VSZ */
432 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
433 asprintf (&fmt, _("%s%sVSZ >= %d"), fmt, (options ? ", " : ""), vsz);
434 options |= VSZ;
435 break;
436 }
437 printf (_("%s: VSZ must be an integer!\n\n"),
438 progname);
439 print_usage ();
440 exit (STATE_UNKNOWN);
441 case 'P': /* PCPU */
442 /* TODO: -P 1.5.5 is accepted */
443 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
444 asprintf (&fmt, _("%s%sPCPU >= %.2f"), fmt, (options ? ", " : ""), pcpu);
445 options |= PCPU;
446 break;
447 }
448 printf (_("%s: PCPU must be a float!\n\n"),
449 progname);
450 print_usage ();
451 exit (STATE_UNKNOWN);
452 case 'm':
453 asprintf (&metric_name, "%s", optarg);
454 if ( strcmp(optarg, "PROCS") == 0) {
455 metric = METRIC_PROCS;
456 break;
457 }
458 else if ( strcmp(optarg, "VSZ") == 0) {
459 metric = METRIC_VSZ;
460 break;
461 }
462 else if ( strcmp(optarg, "RSS") == 0 ) {
463 metric = METRIC_RSS;
464 break;
465 }
466 else if ( strcmp(optarg, "CPU") == 0 ) {
467 metric = METRIC_CPU;
468 break;
469 }
470 printf (_("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n"),
471 progname);
472 print_usage ();
473 exit (STATE_UNKNOWN);
474 case 'v': /* command */
475 verbose++;
476 break;
477 }
478 }
480 c = optind;
481 if (wmax == -1 && argv[c])
482 wmax = atoi (argv[c++]);
483 if (cmax == -1 && argv[c])
484 cmax = atoi (argv[c++]);
485 if (statopts == NULL && argv[c]) {
486 asprintf (&statopts, "%s", argv[c++]);
487 asprintf (&fmt, _("%s%sSTATE = %s"), fmt, (options ? ", " : ""), statopts);
488 options |= STAT;
489 }
491 return validate_arguments ();
492 }
497 int
498 validate_arguments ()
499 {
501 if (wmax >= 0 && wmin == -1)
502 wmin = 0;
503 if (cmax >= 0 && cmin == -1)
504 cmin = 0;
505 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
506 if (wmax > cmax && cmax != -1) {
507 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax, cmax);
508 return ERROR;
509 }
510 if (cmin > wmin && wmin != -1) {
511 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin, cmin);
512 return ERROR;
513 }
514 }
516 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
517 /* printf ("At least one threshold must be set\n"); */
518 /* return ERROR; */
519 /* } */
521 if (options == 0)
522 options = ALL;
524 return options;
525 }
531 \f
532 /* Check thresholds against value */
533 int
534 check_thresholds (int value)
535 {
536 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
537 return OK;
538 }
539 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
540 if (value > cmax && value < cmin)
541 return STATE_CRITICAL;
542 }
543 else if (cmax >= 0 && value > cmax) {
544 return STATE_CRITICAL;
545 }
546 else if (cmin >= 0 && value < cmin) {
547 return STATE_CRITICAL;
548 }
550 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
551 if (value > wmax && value < wmin) {
552 return STATE_WARNING;
553 }
554 }
555 else if (wmax >= 0 && value > wmax) {
556 return STATE_WARNING;
557 }
558 else if (wmin >= 0 && value < wmin) {
559 return STATE_WARNING;
560 }
561 return STATE_OK;
562 }
568 \f
569 void
570 print_help (void)
571 {
572 print_revision (progname, revision);
574 printf (_("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>"));
575 printf (_(COPYRIGHT), copyright, email);
577 printf(_("\
578 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
579 metric is outside the required threshold ranges. The metric defaults to number\n\
580 of processes. Search filters can be applied to limit the processes to check.\n\n"));
582 print_usage ();
584 printf(_("\n\
585 Required Arguments:\n\
586 -w, --warning=RANGE\n\
587 Generate warning state if metric is outside this range\n\
588 -c, --critical=RANGE\n\
589 Generate critical state if metric is outside this range\n"));
591 printf(_("\n\
592 Optional Arguments:\n\
593 -m, --metric=TYPE\n\
594 Check thresholds against metric. Valid types:\n\
595 PROCS - number of processes (default)\n\
596 VSZ - virtual memory size\n\
597 RSS - resident set memory size\n\
598 CPU - percentage cpu\n\
599 -v, --verbose\n\
600 Extra information. Up to 3 verbosity levels\n"));
602 printf(_("\n\
603 Optional Filters:\n\
604 -s, --state=STATUSFLAGS\n\
605 Only scan for processes that have, in the output of `ps`, one or\n\
606 more of the status flags you specify (for example R, Z, S, RS,\n\
607 RSZDT, plus others based on the output of your 'ps' command).\n\
608 -p, --ppid=PPID\n\
609 Only scan for children of the parent process ID indicated.\n\
610 -z, --vsz=VSZ\n\
611 Only scan for processes with vsz higher than indicated.\n\
612 -r, --rss=RSS\n\
613 Only scan for processes with rss higher than indicated.\n"));
615 printf(_("\
616 -P, --pcpu=PCPU\n\
617 Only scan for processes with pcpu higher than indicated.\n\
618 -u, --user=USER\n\
619 Only scan for processes with user name or ID indicated.\n\
620 -a, --argument-array=STRING\n\
621 Only scan for processes with args that contain STRING.\n\
622 -C, --command=COMMAND\n\
623 Only scan for exact matches to the named COMMAND.\n"));
625 printf(_("\n\
626 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
627 specified 'max:min', a warning status will be generated if the\n\
628 count is inside the specified range\n\n"));
630 printf(_("\
631 This plugin checks the number of currently running processes and\n\
632 generates WARNING or CRITICAL states if the process count is outside\n\
633 the specified threshold ranges. The process count can be filtered by\n\
634 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
635 be the total number of running processes\n\n"));
637 printf(_("\
638 Examples:\n\
639 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
640 Warning if not two processes with command name portsentry. Critical\n\
641 if < 2 or > 1024 processes\n\n\
642 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
643 Warning alert if > 10 processes with command arguments containing \n\
644 '/usr/local/bin/perl' and owned by root\n\n\
645 check_procs -w 50000 -c 100000 --metric=VSZ\n\
646 Alert if vsz of any processes over 50K or 100K\n\n"));
648 printf (_(UT_SUPPORT));
649 }
651 void
652 print_usage (void)
653 {
654 printf ("\
655 Usage: %s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n\
656 [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n\
657 [-C command] [-v]\n", progname);
658 printf (_(UT_HLP_VRS), progname, progname);
659 }