Code

Initial revision
[nagiosplug.git] / plugins / check_overcr.c
1 /******************************************************************************
2 *
3 * CHECK_OVERCR.C
4 *
5 * Program: Over-CR collector plugin for Nagios
6 * License: GPL
7 * Copyright (c) 1999 Ethan Galstad (nagios@nagios.org)
8 *
9 * $Id$
10 *
11 * Description:
12 *
13 * Notes:
14 * - This plugin requires that Eric Molitors' Over-CR collector daemon
15 *        be running on any UNIX boxes you want to monitor.  Over-CR
16 *        is available from * http://www.molitor.org/overcr/
17 *
18 * Modifications:
19 *
20 * 08-11-999 Jacob Lundqvist <jaclu@grm.se>
21 * Load was presented as a one digit percentage - changed to two digit
22 *       value * before load of 11.2 was presented as "1.2%" (not very
23 *       high). Warning and Critical params were int's, not very good
24 *       for load, changed to doubles, so we can trap loadlimits like
25 *       1.5.  Also added more informative LOAD error messages.
26
27 * License Information:
28 *
29 * This program is free software; you can redistribute it and/or modify
30 * it under the terms of the GNU General Public License as published by
31 * the Free Software Foundation; either version 2 of the License, or
32 * (at your option) any later version.
33 *
34 * This program is distributed in the hope that it will be useful,
35 * but WITHOUT ANY WARRANTY; without even the implied warranty of
36 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
37 * GNU General Public License for more details.
38 *
39 * You should have received a copy of the GNU General Public License
40 * along with this program; if not, write to the Free Software
41 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
42 *
43 *****************************************************************************/
45 #include "config.h"
46 #include "common.h"
47 #include "netutils.h"
48 #include "utils.h"
50 #define CHECK_NONE      0
51 #define CHECK_LOAD1     1
52 #define CHECK_LOAD5     2
53 #define CHECK_LOAD15    4
54 #define CHECK_DPU       8
55 #define CHECK_PROCS     16
56 #define CHECK_NETSTAT   32
57 #define CHECK_UPTIME    64
59 #define PORT    2000
61 #define PROGNAME "check_overcr"
63 char *server_address = NULL;
64 int server_port = PORT;
65 double warning_value = 0L;
66 double critical_value = 0L;
67 int check_warning_value = FALSE;
68 int check_critical_value = FALSE;
69 int vars_to_check = CHECK_NONE;
70 int cmd_timeout = 1;
72 int netstat_port = 0;
73 char *disk_name = NULL;
74 char *process_name = NULL;
76 int process_arguments (int, char **);
77 void print_usage (void);
78 void print_help (void);
80 int
81 main (int argc, char **argv)
82 {
83         int result;
84         char send_buffer[MAX_INPUT_BUFFER];
85         char recv_buffer[MAX_INPUT_BUFFER];
86         char output_message[MAX_INPUT_BUFFER];
87         char temp_buffer[MAX_INPUT_BUFFER];
88         char *temp_ptr = NULL;
89         int found_disk = FALSE;
90         unsigned long percent_used_disk_space = 100;
91         double load;
92         double load_1min;
93         double load_5min;
94         double load_15min;
95         int port_connections = 0;
96         int processes = 0;
97         double uptime_raw_hours;
98         int uptime_raw_minutes = 0;
99         int uptime_days = 0;
100         int uptime_hours = 0;
101         int uptime_minutes = 0;
103         if (process_arguments (argc, argv) == ERROR)
104                 usage ("Could not parse arguments\n");
106         /* initialize alarm signal handling */
107         signal (SIGALRM, socket_timeout_alarm_handler);
109         /* set socket timeout */
110         alarm (socket_timeout);
112         result = STATE_OK;
114         if (vars_to_check == CHECK_LOAD1 || vars_to_check == CHECK_LOAD5
115                         || vars_to_check == CHECK_LOAD15) {
117                 strcpy (send_buffer, "LOAD\r\nQUIT\r\n");
118                 result =
119                         process_tcp_request2 (server_address, server_port, send_buffer,
120                                                                                                                 recv_buffer, sizeof (recv_buffer));
121                 if (result != STATE_OK)
122                         return result;
124                 temp_ptr = (char *) strtok (recv_buffer, "\r\n");
125                 if (temp_ptr == NULL) {
126                         printf ("Invalid response from server - no load information\n");
127                         return STATE_CRITICAL;
128                 }
129                 load_1min = strtod (temp_ptr, NULL);
130                 temp_ptr = (char *) strtok (NULL, "\r\n");
131                 if (temp_ptr == NULL) {
132                         printf ("Invalid response from server after load 1\n");
133                         return STATE_CRITICAL;
134                 }
135                 load_5min = strtod (temp_ptr, NULL);
136                 temp_ptr = (char *) strtok (NULL, "\r\n");
137                 if (temp_ptr == NULL) {
138                         printf ("Invalid response from server after load 5\n");
139                         return STATE_CRITICAL;
140                 }
141                 load_15min = strtod (temp_ptr, NULL);
144                 switch (vars_to_check) {
145                 case CHECK_LOAD1:
146                         strcpy (temp_buffer, "1");
147                         load = load_1min;
148                         break;
149                 case CHECK_LOAD5:
150                         strcpy (temp_buffer, "5");
151                         load = load_5min;
152                         break;
153                 default:
154                         strcpy (temp_buffer, "15");
155                         load = load_15min;
156                         break;
157                 }
159                 if (check_critical_value == TRUE && (load >= critical_value))
160                         result = STATE_CRITICAL;
161                 else if (check_warning_value == TRUE && (load >= warning_value))
162                         result = STATE_WARNING;
163                 sprintf (output_message, "Load %s - %s-min load average = %0.2f",
164                                                  (result == STATE_OK) ? "ok" : "problem", temp_buffer, load);
165         }
168         else if (vars_to_check == CHECK_DPU) {
170                 sprintf (send_buffer, "DISKSPACE\r\n");
171                 result =
172                         process_tcp_request2 (server_address, server_port, send_buffer,
173                                                                                                                 recv_buffer, sizeof (recv_buffer));
174                 if (result != STATE_OK)
175                         return result;
177                 for (temp_ptr = (char *) strtok (recv_buffer, " "); temp_ptr != NULL;
178                                  temp_ptr = (char *) strtok (NULL, " ")) {
180                         if (!strcmp (temp_ptr, disk_name)) {
181                                 found_disk = TRUE;
182                                 temp_ptr = (char *) strtok (NULL, "%");
183                                 if (temp_ptr == NULL) {
184                                         printf ("Invalid response from server\n");
185                                         return STATE_CRITICAL;
186                                 }
187                                 percent_used_disk_space = strtoul (temp_ptr, NULL, 10);
188                                 break;
189                         }
191                         temp_ptr = (char *) strtok (NULL, "\r\n");
192                 }
194                 /* error if we couldn't find the info for the disk */
195                 if (found_disk == FALSE) {
196                         sprintf (output_message, "Error: Disk '%s' non-existent or not mounted",
197                                                          disk_name);
198                         result = STATE_CRITICAL;
199                 }
201                 /* else check the disk space used */
202                 else {
204                         if (check_critical_value == TRUE
205                                         && (percent_used_disk_space >= critical_value)) result =
206                                         STATE_CRITICAL;
207                         else if (check_warning_value == TRUE
208                                                          && (percent_used_disk_space >= warning_value)) result =
209                                         STATE_WARNING;
211                         sprintf (output_message, "Disk %s - %lu%% used on %s",
212                                                          (result == STATE_OK) ? "ok" : "problem",
213                                                          percent_used_disk_space, disk_name);
214                 }
215         }
217         else if (vars_to_check == CHECK_NETSTAT) {
219                 sprintf (send_buffer, "NETSTAT %d\r\n", netstat_port);
220                 result =
221                         process_tcp_request2 (server_address, server_port, send_buffer,
222                                                                                                                 recv_buffer, sizeof (recv_buffer));
223                 if (result != STATE_OK)
224                         return result;
226                 port_connections = strtod (recv_buffer, NULL);
228                 if (check_critical_value == TRUE && (port_connections >= critical_value))
229                         result = STATE_CRITICAL;
230                 else if (check_warning_value == TRUE
231                                                  && (port_connections >= warning_value)) result = STATE_WARNING;
233                 sprintf (output_message, "Net %s - %d connection%s on port %d",
234                                                  (result == STATE_OK) ? "ok" : "problem", port_connections,
235                                                  (port_connections == 1) ? "" : "s", netstat_port);
236         }
238         else if (vars_to_check == CHECK_PROCS) {
240                 sprintf (send_buffer, "PROCESS %s\r\n", process_name);
241                 result =
242                         process_tcp_request2 (server_address, server_port, send_buffer,
243                                                                                                                 recv_buffer, sizeof (recv_buffer));
244                 if (result != STATE_OK)
245                         return result;
247                 temp_ptr = (char *) strtok (recv_buffer, "(");
248                 if (temp_ptr == NULL) {
249                         printf ("Invalid response from server\n");
250                         return STATE_CRITICAL;
251                 }
252                 temp_ptr = (char *) strtok (NULL, ")");
253                 if (temp_ptr == NULL) {
254                         printf ("Invalid response from server\n");
255                         return STATE_CRITICAL;
256                 }
257                 processes = strtod (temp_ptr, NULL);
259                 if (check_critical_value == TRUE && (processes >= critical_value))
260                         result = STATE_CRITICAL;
261                 else if (check_warning_value == TRUE && (processes >= warning_value))
262                         result = STATE_WARNING;
264                 sprintf (output_message, "Process %s - %d instance%s of %s running",
265                                                  (result == STATE_OK) ? "ok" : "problem", processes,
266                                                  (processes == 1) ? "" : "s", process_name);
267         }
269         else if (vars_to_check == CHECK_UPTIME) {
271                 sprintf (send_buffer, "UPTIME\r\n");
272                 result =
273                         process_tcp_request2 (server_address, server_port, send_buffer,
274                                                                                                                 recv_buffer, sizeof (recv_buffer));
275                 if (result != STATE_OK)
276                         return result;
278                 uptime_raw_hours = strtod (recv_buffer, NULL);
279                 uptime_raw_minutes = (unsigned long) (uptime_raw_hours * 60.0);
281                 if (check_critical_value == TRUE
282                                 && (uptime_raw_minutes <= critical_value)) result = STATE_CRITICAL;
283                 else if (check_warning_value == TRUE
284                                                  && (uptime_raw_minutes <= warning_value)) result = STATE_WARNING;
286                 uptime_days = uptime_raw_minutes / 1440;
287                 uptime_raw_minutes %= 1440;
288                 uptime_hours = uptime_raw_minutes / 60;
289                 uptime_raw_minutes %= 60;
290                 uptime_minutes = uptime_raw_minutes;
292                 sprintf (output_message, "Uptime %s - Up %d days %d hours %d minutes",
293                                                  (result == STATE_OK) ? "ok" : "problem", uptime_days,
294                                                  uptime_hours, uptime_minutes);
295         }
297         else {
298                 strcpy (output_message, "Nothing to check!\n");
299                 result = STATE_UNKNOWN;
300         }
302         /* reset timeout */
303         alarm (0);
305         printf ("%s\n", output_message);
307         return result;
314 /* process command-line arguments */
315 int
316 process_arguments (int argc, char **argv)
318         int c;
320 #ifdef HAVE_GETOPT_H
321         int option_index = 0;
322         static struct option long_options[] = {
323                 {"port", required_argument, 0, 'p'},
324                 {"timeout", required_argument, 0, 't'},
325                 {"critical", required_argument, 0, 'c'},
326                 {"warning", required_argument, 0, 'w'},
327                 {"variable", required_argument, 0, 'v'},
328                 {"hostname", required_argument, 0, 'H'},
329                 {"version", no_argument, 0, 'V'},
330                 {"help", no_argument, 0, 'h'},
331                 {0, 0, 0, 0}
332         };
333 #endif
335         /* no options were supplied */
336         if (argc < 2)
337                 return ERROR;
339         /* backwards compatibility */
340         if (!is_option (argv[1])) {
341                 server_address = argv[1];
342                 argv[1] = argv[0];
343                 argv = &argv[1];
344                 argc--;
345         }
347         for (c = 1; c < argc; c++) {
348                 if (strcmp ("-to", argv[c]) == 0)
349                         strcpy (argv[c], "-t");
350                 else if (strcmp ("-wv", argv[c]) == 0)
351                         strcpy (argv[c], "-w");
352                 else if (strcmp ("-cv", argv[c]) == 0)
353                         strcpy (argv[c], "-c");
354         }
356         while (1) {
357 #ifdef HAVE_GETOPT_H
358                 c =
359                         getopt_long (argc, argv, "+hVH:t:c:w:p:v:", long_options,
360                                                                          &option_index);
361 #else
362                 c = getopt (argc, argv, "+hVH:t:c:w:p:v:");
363 #endif
365                 if (c == -1 || c == EOF || c == 1)
366                         break;
368                 switch (c) {
369                 case '?':                                                                       /* print short usage statement if args not parsable */
370                         printf ("%s: Unknown argument: %s\n\n", my_basename (argv[0]), optarg);
371                         print_usage ();
372                         exit (STATE_UNKNOWN);
373                 case 'h':                                                                       /* help */
374                         print_help ();
375                         exit (STATE_OK);
376                 case 'V':                                                                       /* version */
377                         print_revision (my_basename (argv[0]), "$Revision$");
378                         exit (STATE_OK);
379                 case 'H':                                                                       /* hostname */
380                         server_address = optarg;
381                         break;
382                 case 'p':                                                                       /* port */
383                         if (is_intnonneg (optarg))
384                                 server_port = atoi (optarg);
385                         else
386                                 terminate (STATE_UNKNOWN,
387                                                                          "Server port an integer (seconds)\nType '%s -h' for additional help\n",
388                                                                          PROGNAME);
389                         break;
390                 case 'v':                                                                       /* variable */
391                         if (strcmp (optarg, "LOAD1") == 0)
392                                 vars_to_check = CHECK_LOAD1;
393                         else if (strcmp (optarg, "LOAD5") == 0)
394                                 vars_to_check = CHECK_LOAD5;
395                         else if (strcmp (optarg, "LOAD15") == 0)
396                                 vars_to_check = CHECK_LOAD15;
397                         else if (strcmp (optarg, "UPTIME") == 0)
398                                 vars_to_check = CHECK_UPTIME;
399                         else if (strstr (optarg, "PROC") == optarg) {
400                                 vars_to_check = CHECK_PROCS;
401                                 process_name = strscpy (process_name, optarg + 4);
402                         }
403                         else if (strstr (optarg, "NET") == optarg) {
404                                 vars_to_check = CHECK_NETSTAT;
405                                 netstat_port = atoi (optarg + 3);
406                         }
407                         else if (strstr (optarg, "DPU") == optarg) {
408                                 vars_to_check = CHECK_DPU;
409                                 disk_name = strscpy (disk_name, optarg + 3);
410                         }
411                         else
412                                 return ERROR;
413                         break;
414                 case 'w':                                                                       /* warning threshold */
415                         warning_value = strtoul (optarg, NULL, 10);
416                         check_warning_value = TRUE;
417                         break;
418                 case 'c':                                                                       /* critical threshold */
419                         critical_value = strtoul (optarg, NULL, 10);
420                         check_critical_value = TRUE;
421                         break;
422                 case 't':                                                                       /* timeout */
423                         socket_timeout = atoi (optarg);
424                         if (socket_timeout <= 0)
425                                 return ERROR;
426                 }
428         }
429         return OK;
436 void
437 print_usage (void)
439         printf
440                 ("Usage: %s -H host [-p port] [-v variable] [-w warning] [-c critical] [-t timeout]\n",
441                  PROGNAME);
448 void
449 print_help (void)
451         print_revision (PROGNAME, "$Revision$");
452         printf
453                 ("Copyright (c) 2000 Ethan Galstad/Karl DeBisschop\n\n"
454                  "This plugin attempts to contact the Over-CR collector daemon running on the\n"
455                  "remote UNIX server in order to gather the requested system information. This\n"
456                  "plugin requres that Eric Molitors' Over-CR collector daemon be running on the\n"
457                  "remote server. Over-CR can be downloaded from http://www.molitor.org/overcr\n"
458                  "(This plugin was tested with version 0.99.53 of the Over-CR collector)\n\n");
459         print_usage ();
460         printf
461                 ("\nOptions:\n"
462                  "-H, --hostname=HOST\n"
463                  "   Name of the host to check\n"
464                  "-p, --port=INTEGER\n"
465                  "   Optional port number (default: %d)\n"
466                  "-v, --variable=STRING\n"
467                  "   Variable to check.  Valid variables include:\n"
468                  "     LOAD1         = 1 minute average CPU load\n"
469                  "     LOAD5         = 5 minute average CPU load\n"
470                  "     LOAD15        = 15 minute average CPU load\n"
471                  "     DPU<filesys>  = percent used disk space on filesystem <filesys>\n"
472                  "     PROC<process> = number of running processes with name <process>\n"
473                  "     NET<port>     = number of active connections on TCP port <port>\n"
474                  "     UPTIME        = system uptime in seconds\n"
475                  " -w, --warning=INTEGER\n"
476                  "   Threshold which will result in a warning status\n"
477                  " -c, --critical=INTEGER\n"
478                  "   Threshold which will result in a critical status\n"
479                  " -t, --timeout=INTEGER\n"
480                  "   Seconds before connection attempt times out (default: %d)\n"
481                  "-h, --help\n"
482                  "   Print this help screen\n"
483                  "-V, --version\n"
484                  "   Print version information\n\n"
485                  "Notes:\n"
486                  " - For the available options, the critical threshold value should always be\n"
487                  "   higher than the warning threshold value, EXCEPT with the uptime variable\n"
488                  "   (i.e. lower uptimes are worse).\n", PORT, DEFAULT_SOCKET_TIMEOUT);