1 /******************************************************************************
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 ******************************************************************************/
19 const char *progname = "check_procs";
20 const char *revision = "$Revision$";
21 const char *copyright = "2000-2003";
22 const char *email = "nagiosplug-devel@lists.sourceforge.net";
24 #include "common.h"
25 #include "popen.h"
26 #include "utils.h"
27 #include <pwd.h>
29 int process_arguments (int, char **);
30 int validate_arguments (void);
31 int check_thresholds (int);
32 void print_help (void);
33 void print_usage (void);
35 int wmax = -1;
36 int cmax = -1;
37 int wmin = -1;
38 int cmin = -1;
40 int options = 0; /* bitmask of filter criteria to test against */
41 #define ALL 1
42 #define STAT 2
43 #define PPID 4
44 #define USER 8
45 #define PROG 16
46 #define ARGS 32
47 #define VSZ 64
48 #define RSS 128
49 #define PCPU 256
51 /* Different metrics */
52 char *metric_name;
53 enum metric {
54 METRIC_PROCS,
55 METRIC_VSZ,
56 METRIC_RSS,
57 METRIC_CPU
58 };
59 enum metric metric = METRIC_PROCS;
61 int verbose = 0;
62 int uid;
63 int ppid;
64 int vsz;
65 int rss;
66 float pcpu;
67 char *statopts;
68 char *prog;
69 char *args;
70 char *fmt;
71 char *fails;
72 char tmp[MAX_INPUT_BUFFER];
78 \f
79 int
80 main (int argc, char **argv)
81 {
82 char *input_buffer;
83 char *input_line;
84 char *procprog;
86 int procuid = 0;
87 int procppid = 0;
88 int procvsz = 0;
89 int procrss = 0;
90 float procpcpu = 0;
91 char procstat[8];
92 char *procargs;
93 char *temp_string;
95 const char *zombie = "Z";
97 int resultsum = 0; /* bitmask of the filter criteria met by a process */
98 int found = 0; /* counter for number of lines returned in `ps` output */
99 int procs = 0; /* counter for number of processes meeting filter criteria */
100 int pos; /* number of spaces before 'args' in `ps` output */
101 int cols; /* number of columns in ps output */
102 int expected_cols = PS_COLS - 1;
103 int warn = 0; /* number of processes in warn state */
104 int crit = 0; /* number of processes in crit state */
105 int i = 0;
107 int result = STATE_UNKNOWN;
109 setlocale (LC_ALL, "");
110 bindtextdomain (PACKAGE, LOCALEDIR);
111 textdomain (PACKAGE);
113 input_buffer = malloc (MAX_INPUT_BUFFER);
114 procprog = malloc (MAX_INPUT_BUFFER);
116 asprintf (&metric_name, "PROCS");
117 metric = METRIC_PROCS;
119 if (process_arguments (argc, argv) == ERROR)
120 usage (_("Unable to parse command line\n"));
122 if (verbose >= 2)
123 printf (_("CMD: %s\n"), PS_COMMAND);
125 child_process = spopen (PS_COMMAND);
126 if (child_process == NULL) {
127 printf (_("Could not open pipe: %s\n"), PS_COMMAND);
128 return STATE_UNKNOWN;
129 }
131 child_stderr = fdopen (child_stderr_array[fileno (child_process)], "r");
132 if (child_stderr == NULL)
133 printf (_("Could not open stderr for %s\n"), PS_COMMAND);
135 /* flush first line */
136 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
137 while ( input_buffer[strlen(input_buffer)-1] != '\n' )
138 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
140 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process)) {
141 asprintf (&input_line, "%s", input_buffer);
142 while ( input_buffer[strlen(input_buffer)-1] != '\n' ) {
143 fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_process);
144 asprintf (&input_line, "%s%s", input_line, input_buffer);
145 }
147 if (verbose >= 3)
148 printf ("%s", input_line);
150 strcpy (procprog, "");
151 asprintf (&procargs, "%s", "");
153 cols = sscanf (input_line, PS_FORMAT, PS_VARLIST);
155 /* Zombie processes do not give a procprog command */
156 if ( cols == (expected_cols - 1) && strstr(procstat, zombie) ) {
157 cols = expected_cols;
158 }
159 if ( cols >= expected_cols ) {
160 resultsum = 0;
161 asprintf (&procargs, "%s", input_line + pos);
162 strip (procargs);
164 /* Some ps return full pathname for command. This removes path */
165 temp_string = strtok ((char *)procprog, "/");
166 while (temp_string) {
167 strcpy(procprog, temp_string);
168 temp_string = strtok (NULL, "/");
169 }
171 if (verbose >= 3)
172 printf ("%d %d %d %d %d %.2f %s %s %s\n",
173 procs, procuid, procvsz, procrss,
174 procppid, procpcpu, procstat, procprog, procargs);
176 /* Ignore self */
177 if (strcmp (procprog, progname) == 0) {
178 continue;
179 }
181 if ((options & STAT) && (strstr (statopts, procstat)))
182 resultsum |= STAT;
183 if ((options & ARGS) && procargs && (strstr (procargs, args) != NULL))
184 resultsum |= ARGS;
185 if ((options & PROG) && procprog && (strcmp (prog, procprog) == 0))
186 resultsum |= PROG;
187 if ((options & PPID) && (procppid == ppid))
188 resultsum |= PPID;
189 if ((options & USER) && (procuid == uid))
190 resultsum |= USER;
191 if ((options & VSZ) && (procvsz >= vsz))
192 resultsum |= VSZ;
193 if ((options & RSS) && (procrss >= rss))
194 resultsum |= RSS;
195 if ((options & PCPU) && (procpcpu >= pcpu))
196 resultsum |= PCPU;
198 found++;
200 /* Next line if filters not matched */
201 if (!(options == resultsum || options == ALL))
202 continue;
204 procs++;
206 if (metric == METRIC_VSZ)
207 i = check_thresholds (procvsz);
208 else if (metric == METRIC_RSS)
209 i = check_thresholds (procrss);
210 /* TODO? float thresholds for --metric=CPU */
211 else if (metric == METRIC_CPU)
212 i = check_thresholds ((int)procpcpu);
214 if (metric != METRIC_PROCS) {
215 if (i == STATE_WARNING) {
216 warn++;
217 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
218 result = max_state (result, i);
219 }
220 if (i == STATE_CRITICAL) {
221 crit++;
222 asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog);
223 result = max_state (result, i);
224 }
225 }
226 }
227 /* This should not happen */
228 else if (verbose) {
229 printf(_("Not parseable: %s"), input_buffer);
230 }
231 }
233 /* If we get anything on STDERR, at least set warning */
234 while (fgets (input_buffer, MAX_INPUT_BUFFER - 1, child_stderr)) {
235 if (verbose)
236 printf (_("STDERR: %s"), input_buffer);
237 result = max_state (result, STATE_WARNING);
238 printf (_("System call sent warnings to stderr\n"));
239 }
241 (void) fclose (child_stderr);
243 /* close the pipe */
244 if (spclose (child_process)) {
245 printf (_("System call returned nonzero status\n"));
246 result = max_state (result, STATE_WARNING);
247 }
249 if (found == 0) { /* no process lines parsed so return STATE_UNKNOWN */
250 printf (_("Unable to read output\n"));
251 return result;
252 }
254 if ( result == STATE_UNKNOWN )
255 result = STATE_OK;
257 /* Needed if procs found, but none match filter */
258 if ( metric == METRIC_PROCS ) {
259 result = max_state (result, check_thresholds (procs) );
260 }
262 if ( result == STATE_OK ) {
263 printf ("%s %s: ", metric_name, _("OK"));
264 } else if (result == STATE_WARNING) {
265 printf ("%s %s: ", metric_name, _("WARNING"));
266 if ( metric != METRIC_PROCS ) {
267 printf (_("%d warn out of "), warn);
268 }
269 } else if (result == STATE_CRITICAL) {
270 printf ("%s %s: ", metric_name, _("CRITICAL"));
271 if (metric != METRIC_PROCS) {
272 printf (_("%d crit, %d warn out of "), crit, warn);
273 }
274 }
275 printf (ngettext ("%d process", "%d processes", (unsigned long) procs), procs);
277 if (strcmp(fmt,"") != 0) {
278 printf (_(" with %s"), fmt);
279 }
281 if ( verbose >= 1 && strcmp(fails,"") )
282 printf (" [%s]", fails);
284 printf ("\n");
285 return result;
286 }
292 \f
293 /* process command-line arguments */
294 int
295 process_arguments (int argc, char **argv)
296 {
297 int c = 1;
298 char *user;
299 struct passwd *pw;
300 int option = 0;
301 static struct option longopts[] = {
302 {"warning", required_argument, 0, 'w'},
303 {"critical", required_argument, 0, 'c'},
304 {"metric", required_argument, 0, 'm'},
305 {"timeout", required_argument, 0, 't'},
306 {"status", required_argument, 0, 's'},
307 {"ppid", required_argument, 0, 'p'},
308 {"command", required_argument, 0, 'C'},
309 {"vsz", required_argument, 0, 'z'},
310 {"rss", required_argument, 0, 'r'},
311 {"pcpu", required_argument, 0, 'P'},
312 {"argument-array", required_argument, 0, 'a'},
313 {"help", no_argument, 0, 'h'},
314 {"version", no_argument, 0, 'V'},
315 {"verbose", no_argument, 0, 'v'},
316 {0, 0, 0, 0}
317 };
319 for (c = 1; c < argc; c++)
320 if (strcmp ("-to", argv[c]) == 0)
321 strcpy (argv[c], "-t");
323 while (1) {
324 c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:",
325 longopts, &option);
327 if (c == -1 || c == EOF)
328 break;
330 switch (c) {
331 case '?': /* help */
332 print_usage ();
333 exit (STATE_UNKNOWN);
334 case 'h': /* help */
335 print_help ();
336 exit (STATE_OK);
337 case 'V': /* version */
338 print_revision (progname, revision);
339 exit (STATE_OK);
340 case 't': /* timeout period */
341 if (!is_integer (optarg))
342 usage (_("Timeout Interval must be an integer!\n\n"));
343 else
344 timeout_interval = atoi (optarg);
345 break;
346 case 'c': /* critical threshold */
347 if (is_integer (optarg))
348 cmax = atoi (optarg);
349 else if (sscanf (optarg, ":%d", &cmax) == 1)
350 break;
351 else if (sscanf (optarg, "%d:%d", &cmin, &cmax) == 2)
352 break;
353 else if (sscanf (optarg, "%d:", &cmin) == 1)
354 break;
355 else
356 usage (_("Critical Process Count must be an integer!\n\n"));
357 break;
358 case 'w': /* warning threshold */
359 if (is_integer (optarg))
360 wmax = atoi (optarg);
361 else if (sscanf (optarg, ":%d", &wmax) == 1)
362 break;
363 else if (sscanf (optarg, "%d:%d", &wmin, &wmax) == 2)
364 break;
365 else if (sscanf (optarg, "%d:", &wmin) == 1)
366 break;
367 else
368 usage (_("Warning Process Count must be an integer!\n\n"));
369 break;
370 case 'p': /* process id */
371 if (sscanf (optarg, "%d%[^0-9]", &ppid, tmp) == 1) {
372 asprintf (&fmt, "%s%sPPID = %d", (fmt ? fmt : "") , (options ? ", " : ""), ppid);
373 options |= PPID;
374 break;
375 }
376 usage2 (_("%s: Parent Process ID must be an integer!\n\n"), progname);
377 case 's': /* status */
378 if (statopts)
379 break;
380 else
381 statopts = optarg;
382 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
383 options |= STAT;
384 break;
385 case 'u': /* user or user id */
386 if (is_integer (optarg)) {
387 uid = atoi (optarg);
388 pw = getpwuid ((uid_t) uid);
389 /* check to be sure user exists */
390 if (pw == NULL)
391 usage2 (_("UID %s was not found\n"), optarg);
392 }
393 else {
394 pw = getpwnam (optarg);
395 /* check to be sure user exists */
396 if (pw == NULL)
397 usage2 (_("User name %s was not found\n"), optarg);
398 /* then get uid */
399 uid = pw->pw_uid;
400 }
401 user = pw->pw_name;
402 asprintf (&fmt, _("%s%sUID = %d (%s)"), (fmt ? fmt : ""), (options ? ", " : ""),
403 uid, user);
404 options |= USER;
405 break;
406 case 'C': /* command */
407 if (prog)
408 break;
409 else
410 prog = optarg;
411 asprintf (&fmt, _("%s%scommand name '%s'"), (fmt ? fmt : ""), (options ? ", " : ""),
412 prog);
413 options |= PROG;
414 break;
415 case 'a': /* args (full path name with args) */
416 if (args)
417 break;
418 else
419 args = optarg;
420 asprintf (&fmt, _("%s%sargs '%s'"), (fmt ? fmt : ""), (options ? ", " : ""), args);
421 options |= ARGS;
422 break;
423 case 'r': /* RSS */
424 if (sscanf (optarg, "%d%[^0-9]", &rss, tmp) == 1) {
425 asprintf (&fmt, _("%s%sRSS >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), rss);
426 options |= RSS;
427 break;
428 }
429 usage2 (_("%s: RSS must be an integer!\n\n"), progname);
430 case 'z': /* VSZ */
431 if (sscanf (optarg, "%d%[^0-9]", &vsz, tmp) == 1) {
432 asprintf (&fmt, _("%s%sVSZ >= %d"), (fmt ? fmt : ""), (options ? ", " : ""), vsz);
433 options |= VSZ;
434 break;
435 }
436 usage2 (_("%s: VSZ must be an integer!\n\n"), progname);
437 case 'P': /* PCPU */
438 /* TODO: -P 1.5.5 is accepted */
439 if (sscanf (optarg, "%f%[^0-9.]", &pcpu, tmp) == 1) {
440 asprintf (&fmt, _("%s%sPCPU >= %.2f"), (fmt ? fmt : ""), (options ? ", " : ""), pcpu);
441 options |= PCPU;
442 break;
443 }
444 usage2 (_("%s: PCPU must be a float!\n\n"), progname);
445 case 'm':
446 asprintf (&metric_name, "%s", optarg);
447 if ( strcmp(optarg, "PROCS") == 0) {
448 metric = METRIC_PROCS;
449 break;
450 }
451 else if ( strcmp(optarg, "VSZ") == 0) {
452 metric = METRIC_VSZ;
453 break;
454 }
455 else if ( strcmp(optarg, "RSS") == 0 ) {
456 metric = METRIC_RSS;
457 break;
458 }
459 else if ( strcmp(optarg, "CPU") == 0 ) {
460 metric = METRIC_CPU;
461 break;
462 }
463 printf (_("%s: metric must be one of PROCS, VSZ, RSS, CPU!\n\n"),
464 progname);
465 print_usage ();
466 exit (STATE_UNKNOWN);
467 case 'v': /* command */
468 verbose++;
469 break;
470 }
471 }
473 c = optind;
474 if (wmax == -1 && argv[c])
475 wmax = atoi (argv[c++]);
476 if (cmax == -1 && argv[c])
477 cmax = atoi (argv[c++]);
478 if (statopts == NULL && argv[c]) {
479 asprintf (&statopts, "%s", argv[c++]);
480 asprintf (&fmt, _("%s%sSTATE = %s"), (fmt ? fmt : ""), (options ? ", " : ""), statopts);
481 options |= STAT;
482 }
484 return validate_arguments ();
485 }
490 int
491 validate_arguments ()
492 {
494 if (wmax >= 0 && wmin == -1)
495 wmin = 0;
496 if (cmax >= 0 && cmin == -1)
497 cmin = 0;
498 if (wmax >= wmin && cmax >= cmin) { /* standard ranges */
499 if (wmax > cmax && cmax != -1) {
500 printf (_("wmax (%d) cannot be greater than cmax (%d)\n"), wmax, cmax);
501 return ERROR;
502 }
503 if (cmin > wmin && wmin != -1) {
504 printf (_("wmin (%d) cannot be less than cmin (%d)\n"), wmin, cmin);
505 return ERROR;
506 }
507 }
509 /* if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) { */
510 /* printf ("At least one threshold must be set\n"); */
511 /* return ERROR; */
512 /* } */
514 if (options == 0)
515 options = ALL;
517 if (statopts==NULL)
518 statopts = strdup("");
520 if (prog==NULL)
521 prog = strdup("");
523 if (args==NULL)
524 args = strdup("");
526 if (fmt==NULL)
527 fmt = strdup("");
529 if (fails==NULL)
530 fails = strdup("");
532 return options;
533 }
539 \f
540 /* Check thresholds against value */
541 int
542 check_thresholds (int value)
543 {
544 if (wmax == -1 && cmax == -1 && wmin == -1 && cmin == -1) {
545 return OK;
546 }
547 else if (cmax >= 0 && cmin >= 0 && cmax < cmin) {
548 if (value > cmax && value < cmin)
549 return STATE_CRITICAL;
550 }
551 else if (cmax >= 0 && value > cmax) {
552 return STATE_CRITICAL;
553 }
554 else if (cmin >= 0 && value < cmin) {
555 return STATE_CRITICAL;
556 }
558 if (wmax >= 0 && wmin >= 0 && wmax < wmin) {
559 if (value > wmax && value < wmin) {
560 return STATE_WARNING;
561 }
562 }
563 else if (wmax >= 0 && value > wmax) {
564 return STATE_WARNING;
565 }
566 else if (wmin >= 0 && value < wmin) {
567 return STATE_WARNING;
568 }
569 return STATE_OK;
570 }
576 \f
577 void
578 print_help (void)
579 {
580 print_revision (progname, revision);
582 printf (_("Copyright (c) 1999 Ethan Galstad <nagios@nagios.org>"));
583 printf (_(COPYRIGHT), copyright, email);
585 printf(_("\
586 Checks all processes and generates WARNING or CRITICAL states if the specified\n\
587 metric is outside the required threshold ranges. The metric defaults to number\n\
588 of processes. Search filters can be applied to limit the processes to check.\n\n"));
590 print_usage ();
592 printf(_("\n\
593 Required Arguments:\n\
594 -w, --warning=RANGE\n\
595 Generate warning state if metric is outside this range\n\
596 -c, --critical=RANGE\n\
597 Generate critical state if metric is outside this range\n"));
599 printf(_("\n\
600 Optional Arguments:\n\
601 -m, --metric=TYPE\n\
602 Check thresholds against metric. Valid types:\n\
603 PROCS - number of processes (default)\n\
604 VSZ - virtual memory size\n\
605 RSS - resident set memory size\n\
606 CPU - percentage cpu\n\
607 -v, --verbose\n\
608 Extra information. Up to 3 verbosity levels\n"));
610 printf(_("\n\
611 Optional Filters:\n\
612 -s, --state=STATUSFLAGS\n\
613 Only scan for processes that have, in the output of `ps`, one or\n\
614 more of the status flags you specify (for example R, Z, S, RS,\n\
615 RSZDT, plus others based on the output of your 'ps' command).\n\
616 -p, --ppid=PPID\n\
617 Only scan for children of the parent process ID indicated.\n\
618 -z, --vsz=VSZ\n\
619 Only scan for processes with vsz higher than indicated.\n\
620 -r, --rss=RSS\n\
621 Only scan for processes with rss higher than indicated.\n"));
623 printf(_("\
624 -P, --pcpu=PCPU\n\
625 Only scan for processes with pcpu higher than indicated.\n\
626 -u, --user=USER\n\
627 Only scan for processes with user name or ID indicated.\n\
628 -a, --argument-array=STRING\n\
629 Only scan for processes with args that contain STRING.\n\
630 -C, --command=COMMAND\n\
631 Only scan for exact matches of COMMAND (without path).\n"));
633 printf(_("\n\
634 RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\
635 specified 'max:min', a warning status will be generated if the\n\
636 count is inside the specified range\n\n"));
638 printf(_("\
639 This plugin checks the number of currently running processes and\n\
640 generates WARNING or CRITICAL states if the process count is outside\n\
641 the specified threshold ranges. The process count can be filtered by\n\
642 process owner, parent process PID, current state (e.g., 'Z'), or may\n\
643 be the total number of running processes\n\n"));
645 printf(_("\
646 Examples:\n\
647 check_procs -w 2:2 -c 2:1024 -C portsentry\n\
648 Warning if not two processes with command name portsentry. Critical\n\
649 if < 2 or > 1024 processes\n\n\
650 check_procs -w 10 -a '/usr/local/bin/perl' -u root\n\
651 Warning alert if > 10 processes with command arguments containing \n\
652 '/usr/local/bin/perl' and owned by root\n\n\
653 check_procs -w 50000 -c 100000 --metric=VSZ\n\
654 Alert if vsz of any processes over 50K or 100K\n\
655 check_procs -w 10 -c 20 --metric=CPU\n\
656 Alert if cpu of any processes over 10% or 20%\n\n"));
658 printf (_(UT_SUPPORT));
659 }
661 void
662 print_usage (void)
663 {
664 printf ("\
665 Usage: %s -w <range> -c <range> [-m metric] [-s state] [-p ppid]\n\
666 [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n\
667 [-C command] [-v]\n", progname);
668 printf (_(UT_HLP_VRS), progname, progname);
669 }