1 /******************************************************************************
2 *
3 * Nagios check_icmp plugin
4 *
5 * License: GPL
6 * Copyright (c) 2005-2006 nagios-plugins team
7 *
8 * Original Author : Andreas Ericsson <ae@op5.se>
9 *
10 * Last Modified: $Date$
11 *
12 * Description:
13 *
14 * This file contains the check_icmp plugin
15 *
16 * Relevant RFC's: 792 (ICMP), 791 (IP)
17 *
18 * This program was modeled somewhat after the check_icmp program,
19 * which was in turn a hack of fping (www.fping.org) but has been
20 * completely rewritten since to generate higher precision rta values,
21 * and support several different modes as well as setting ttl to control.
22 * redundant routes. The only remainders of fping is currently a few
23 * function names.
24 *
25 * License Information:
26 *
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License as published by
29 * the Free Software Foundation; either version 2 of the License, or
30 * (at your option) any later version.
31 *
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, write to the Free Software
39 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
40 *
41 * $Id$
42 *
43 *****************************************************************************/
48 #include <sys/time.h>
49 #include <sys/types.h>
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <stdarg.h>
53 #include <unistd.h>
54 #include <stddef.h>
55 #include <errno.h>
56 #include <string.h>
57 #include <ctype.h>
58 #include <netdb.h>
59 #include <sys/socket.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/ip.h>
63 #include <netinet/ip_icmp.h>
64 #include <arpa/inet.h>
65 #include <signal.h>
67 /** sometimes undefined system macros (quite a few, actually) **/
68 #ifndef MAXTTL
69 # define MAXTTL 255
70 #endif
71 #ifndef INADDR_NONE
72 # define INADDR_NONE 0xffffffU
73 #endif
75 #ifndef SOL_IP
76 #define SOL_IP 0
77 #endif
79 /* we bundle these in one #ifndef, since they're all from BSD
80 * Put individual #ifndef's around those that bother you */
81 #ifndef ICMP_UNREACH_NET_UNKNOWN
82 # define ICMP_UNREACH_NET_UNKNOWN 6
83 # define ICMP_UNREACH_HOST_UNKNOWN 7
84 # define ICMP_UNREACH_ISOLATED 8
85 # define ICMP_UNREACH_NET_PROHIB 9
86 # define ICMP_UNREACH_HOST_PROHIB 10
87 # define ICMP_UNREACH_TOSNET 11
88 # define ICMP_UNREACH_TOSHOST 12
89 #endif
90 /* tru64 has the ones above, but not these */
91 #ifndef ICMP_UNREACH_FILTER_PROHIB
92 # define ICMP_UNREACH_FILTER_PROHIB 13
93 # define ICMP_UNREACH_HOST_PRECEDENCE 14
94 # define ICMP_UNREACH_PRECEDENCE_CUTOFF 15
95 #endif
98 /** typedefs and such **/
99 enum states {
100 STATE_OK = 0,
101 STATE_WARNING,
102 STATE_CRITICAL,
103 STATE_UNKNOWN,
104 STATE_DEPENDENT,
105 STATE_OOB
106 };
108 typedef unsigned short range_t; /* type for get_range() -- unimplemented */
110 typedef struct rta_host {
111 unsigned short id; /* id in **table, and icmp pkts */
112 char *name; /* arg used for adding this host */
113 char *msg; /* icmp error message, if any */
114 struct sockaddr_in saddr_in; /* the address of this host */
115 struct in_addr error_addr; /* stores address of error replies */
116 unsigned long long time_waited; /* total time waited, in usecs */
117 unsigned int icmp_sent, icmp_recv, icmp_lost; /* counters */
118 unsigned char icmp_type, icmp_code; /* type and code from errors */
119 unsigned short flags; /* control/status flags */
120 double rta; /* measured RTA */
121 unsigned char pl; /* measured packet loss */
122 struct rta_host *next; /* linked list */
123 } rta_host;
125 #define FLAG_LOST_CAUSE 0x01 /* decidedly dead target. */
127 /* threshold structure. all values are maximum allowed, exclusive */
128 typedef struct threshold {
129 unsigned char pl; /* max allowed packet loss in percent */
130 unsigned int rta; /* roundtrip time average, microseconds */
131 } threshold;
133 /* the data structure */
134 typedef struct icmp_ping_data {
135 struct timeval stime; /* timestamp (saved in protocol struct as well) */
136 unsigned short ping_id;
137 } icmp_ping_data;
139 /* the different modes of this program are as follows:
140 * MODE_RTA: send all packets no matter what (mimic check_icmp and check_ping)
141 * MODE_HOSTCHECK: Return immediately upon any sign of life
142 * In addition, sends packets to ALL addresses assigned
143 * to this host (as returned by gethostbyname() or
144 * gethostbyaddr() and expects one host only to be checked at
145 * a time. Therefore, any packet response what so ever will
146 * count as a sign of life, even when received outside
147 * crit.rta limit. Do not misspell any additional IP's.
148 * MODE_ALL: Requires packets from ALL requested IP to return OK (default).
149 * MODE_ICMP: implement something similar to check_icmp (MODE_RTA without
150 * tcp and udp args does this)
151 */
152 #define MODE_RTA 0
153 #define MODE_HOSTCHECK 1
154 #define MODE_ALL 2
155 #define MODE_ICMP 3
157 /* the different ping types we can do
158 * TODO: investigate ARP ping as well */
159 #define HAVE_ICMP 1
160 #define HAVE_UDP 2
161 #define HAVE_TCP 4
162 #define HAVE_ARP 8
164 #define MIN_PING_DATA_SIZE sizeof(struct icmp_ping_data)
165 #define MAX_IP_PKT_SIZE 65536 /* (theoretical) max IP packet size */
166 #define IP_HDR_SIZE 20
167 #define MAX_PING_DATA (MAX_IP_PKT_SIZE - IP_HDR_SIZE - ICMP_MINLEN)
168 #define DEFAULT_PING_DATA_SIZE (MIN_PING_DATA_SIZE + 44)
170 /* various target states */
171 #define TSTATE_INACTIVE 0x01 /* don't ping this host anymore */
172 #define TSTATE_WAITING 0x02 /* unanswered packets on the wire */
173 #define TSTATE_ALIVE 0x04 /* target is alive (has answered something) */
174 #define TSTATE_UNREACH 0x08
176 /** prototypes **/
177 static void usage(unsigned char, char *);
178 static u_int get_timevar(const char *);
179 static u_int get_timevaldiff(struct timeval *, struct timeval *);
180 static int wait_for_reply(int, u_int);
181 static int recvfrom_wto(int, char *, unsigned int, struct sockaddr *, u_int *);
182 static int send_icmp_ping(int, struct rta_host *);
183 static int get_threshold(char *str, threshold *th);
184 static void run_checks(void);
185 static int add_target(char *);
186 static int add_target_ip(char *, struct in_addr *);
187 static int handle_random_icmp(struct icmp *, struct sockaddr_in *);
188 static unsigned short icmp_checksum(unsigned short *, int);
189 static void finish(int);
190 static void crash(const char *, ...);
192 /** external **/
193 extern int optind, opterr, optopt;
194 extern char *optarg;
195 extern char **environ;
197 /** global variables **/
198 static char *progname;
199 static struct rta_host **table, *cursor, *list;
200 static threshold crit = {80, 500000}, warn = {40, 200000};
201 static int mode, protocols, sockets, debug = 0, timeout = 10;
202 static unsigned short icmp_pkt_size, icmp_data_size = DEFAULT_PING_DATA_SIZE;
203 static unsigned int icmp_sent = 0, icmp_recv = 0, icmp_lost = 0;
204 #define icmp_pkts_en_route (icmp_sent - (icmp_recv + icmp_lost))
205 static unsigned short targets_down = 0, targets = 0, packets = 0;
206 #define targets_alive (targets - targets_down)
207 static unsigned int retry_interval, pkt_interval, target_interval;
208 static int icmp_sock, tcp_sock, udp_sock, status = STATE_OK;
209 static pid_t pid;
210 static struct timezone tz;
211 static struct timeval prog_start;
212 static unsigned long long max_completion_time = 0;
213 static unsigned char ttl = 0; /* outgoing ttl */
214 static unsigned int warn_down = 1, crit_down = 1; /* host down threshold values */
215 float pkt_backoff_factor = 1.5;
216 float target_backoff_factor = 1.5;
218 /** code start **/
219 static void
220 crash(const char *fmt, ...)
221 {
222 va_list ap;
224 printf("%s: ", progname);
226 va_start(ap, fmt);
227 vprintf(fmt, ap);
228 va_end(ap);
230 if(errno) printf(": %s", strerror(errno));
231 puts("");
233 exit(3);
234 }
237 static char *
238 get_icmp_error_msg(unsigned char icmp_type, unsigned char icmp_code)
239 {
240 char *msg = "unreachable";
242 if(debug > 1) printf("get_icmp_error_msg(%u, %u)\n", icmp_type, icmp_code);
243 switch(icmp_type) {
244 case ICMP_UNREACH:
245 switch(icmp_code) {
246 case ICMP_UNREACH_NET: msg = "Net unreachable"; break;
247 case ICMP_UNREACH_HOST: msg = "Host unreachable"; break;
248 case ICMP_UNREACH_PROTOCOL: msg = "Protocol unreachable (firewall?)"; break;
249 case ICMP_UNREACH_PORT: msg = "Port unreachable (firewall?)"; break;
250 case ICMP_UNREACH_NEEDFRAG: msg = "Fragmentation needed"; break;
251 case ICMP_UNREACH_SRCFAIL: msg = "Source route failed"; break;
252 case ICMP_UNREACH_ISOLATED: msg = "Source host isolated"; break;
253 case ICMP_UNREACH_NET_UNKNOWN: msg = "Unknown network"; break;
254 case ICMP_UNREACH_HOST_UNKNOWN: msg = "Unknown host"; break;
255 case ICMP_UNREACH_NET_PROHIB: msg = "Network denied (firewall?)"; break;
256 case ICMP_UNREACH_HOST_PROHIB: msg = "Host denied (firewall?)"; break;
257 case ICMP_UNREACH_TOSNET: msg = "Bad TOS for network (firewall?)"; break;
258 case ICMP_UNREACH_TOSHOST: msg = "Bad TOS for host (firewall?)"; break;
259 case ICMP_UNREACH_FILTER_PROHIB: msg = "Prohibited by filter (firewall)"; break;
260 case ICMP_UNREACH_HOST_PRECEDENCE: msg = "Host precedence violation"; break;
261 case ICMP_UNREACH_PRECEDENCE_CUTOFF: msg = "Precedence cutoff"; break;
262 default: msg = "Invalid code"; break;
263 }
264 break;
266 case ICMP_TIMXCEED:
267 /* really 'out of reach', or non-existant host behind a router serving
268 * two different subnets */
269 switch(icmp_code) {
270 case ICMP_TIMXCEED_INTRANS: msg = "Time to live exceeded in transit"; break;
271 case ICMP_TIMXCEED_REASS: msg = "Fragment reassembly time exceeded"; break;
272 default: msg = "Invalid code"; break;
273 }
274 break;
276 case ICMP_SOURCEQUENCH: msg = "Transmitting too fast"; break;
277 case ICMP_REDIRECT: msg = "Redirect (change route)"; break;
278 case ICMP_PARAMPROB: msg = "Bad IP header (required option absent)"; break;
280 /* the following aren't error messages, so ignore */
281 case ICMP_TSTAMP:
282 case ICMP_TSTAMPREPLY:
283 case ICMP_IREQ:
284 case ICMP_IREQREPLY:
285 case ICMP_MASKREQ:
286 case ICMP_MASKREPLY:
287 default: msg = ""; break;
288 }
290 return msg;
291 }
293 static int
294 handle_random_icmp(struct icmp *p, struct sockaddr_in *addr)
295 {
296 struct icmp *sent_icmp = NULL;
297 struct rta_host *host = NULL;
298 unsigned char *ptr;
300 if(p->icmp_type == ICMP_ECHO && p->icmp_id == pid) {
301 /* echo request from us to us (pinging localhost) */
302 return 0;
303 }
305 ptr = (unsigned char *)p;
306 if(debug) printf("handle_random_icmp(%p, %p)\n", (void *)p, (void *)addr);
308 /* only handle a few types, since others can't possibly be replies to
309 * us in a sane network (if it is anyway, it will be counted as lost
310 * at summary time, but not as quickly as a proper response */
311 /* TIMXCEED can be an unreach from a router with multiple IP's which
312 * serves two different subnets on the same interface and a dead host
313 * on one net is pinged from the other. The router will respond to
314 * itself and thus set TTL=0 so as to not loop forever. Even when
315 * TIMXCEED actually sends a proper icmp response we will have passed
316 * too many hops to have a hope of reaching it later, in which case it
317 * indicates overconfidence in the network, poor routing or both. */
318 if(p->icmp_type != ICMP_UNREACH && p->icmp_type != ICMP_TIMXCEED &&
319 p->icmp_type != ICMP_SOURCEQUENCH && p->icmp_type != ICMP_PARAMPROB)
320 {
321 return 0;
322 }
324 /* might be for us. At least it holds the original package (according
325 * to RFC 792). If it isn't, just ignore it */
326 sent_icmp = (struct icmp *)(ptr + 28);
327 if(sent_icmp->icmp_type != ICMP_ECHO || sent_icmp->icmp_id != pid ||
328 sent_icmp->icmp_seq >= targets)
329 {
330 if(debug) printf("Packet is no response to a packet we sent\n");
331 return 0;
332 }
334 /* it is indeed a response for us */
335 host = table[sent_icmp->icmp_seq];
336 if(debug) {
337 printf("Received \"%s\" from %s for ICMP ECHO sent to %s.\n",
338 get_icmp_error_msg(p->icmp_type, p->icmp_code),
339 inet_ntoa(addr->sin_addr), host->name);
340 }
342 icmp_lost++;
343 host->icmp_lost++;
344 /* don't spend time on lost hosts any more */
345 if(host->flags & FLAG_LOST_CAUSE) return 0;
347 /* source quench means we're sending too fast, so increase the
348 * interval and mark this packet lost */
349 if(p->icmp_type == ICMP_SOURCEQUENCH) {
350 pkt_interval *= pkt_backoff_factor;
351 target_interval *= target_backoff_factor;
352 }
353 else {
354 targets_down++;
355 host->flags |= FLAG_LOST_CAUSE;
356 }
357 host->icmp_type = p->icmp_type;
358 host->icmp_code = p->icmp_code;
359 host->error_addr.s_addr = addr->sin_addr.s_addr;
361 return 0;
362 }
364 int
365 main(int argc, char **argv)
366 {
367 int i;
368 char *ptr;
369 long int arg;
370 int icmp_sockerrno, udp_sockerrno, tcp_sockerrno;
371 int result;
372 struct rta_host *host;
374 /* we only need to be setsuid when we get the sockets, so do
375 * that before pointer magic (esp. on network data) */
376 icmp_sockerrno = udp_sockerrno = tcp_sockerrno = sockets = 0;
378 if((icmp_sock = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) != -1)
379 sockets |= HAVE_ICMP;
380 else icmp_sockerrno = errno;
382 /* if((udp_sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) != -1) */
383 /* sockets |= HAVE_UDP; */
384 /* else udp_sockerrno = errno; */
386 /* if((tcp_sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) */
387 /* sockets |= HAVE_TCP; */
388 /* else tcp_sockerrno = errno; */
390 /* now drop privileges (no effect if not setsuid or geteuid() == 0) */
391 setuid(getuid());
393 /* POSIXLY_CORRECT might break things, so unset it (the portable way) */
394 environ = NULL;
396 /* use the pid to mark packets as ours */
397 pid = getpid();
398 /* printf("pid = %u\n", pid); */
400 /* get calling name the old-fashioned way for portability instead
401 * of relying on the glibc-ism __progname */
402 ptr = strrchr(argv[0], '/');
403 if(ptr) progname = &ptr[1];
404 else progname = argv[0];
406 /* now set defaults. Use progname to set them initially (allows for
407 * superfast check_host program when target host is up */
408 cursor = list = NULL;
409 table = NULL;
411 mode = MODE_RTA;
412 crit.rta = 500000;
413 crit.pl = 80;
414 warn.rta = 200000;
415 warn.pl = 40;
416 protocols = HAVE_ICMP | HAVE_UDP | HAVE_TCP;
417 pkt_interval = 80000; /* 80 msec packet interval by default */
418 packets = 5;
420 if(!strcmp(progname, "check_icmp") || !strcmp(progname, "check_ping")) {
421 mode = MODE_ICMP;
422 protocols = HAVE_ICMP;
423 }
424 else if(!strcmp(progname, "check_host")) {
425 mode = MODE_HOSTCHECK;
426 pkt_interval = 1000000;
427 packets = 5;
428 crit.rta = warn.rta = 1000000;
429 crit.pl = warn.pl = 100;
430 }
431 else if(!strcmp(progname, "check_rta_multi")) {
432 mode = MODE_ALL;
433 target_interval = 0;
434 pkt_interval = 50000;
435 packets = 5;
436 }
438 /* parse the arguments */
439 for(i = 1; i < argc; i++) {
440 while((arg = getopt(argc, argv, "vhVw:c:n:p:t:H:i:b:I:l:")) != EOF) {
441 switch(arg) {
442 case 'v':
443 debug++;
444 break;
445 case 'b':
446 /* silently ignored for now */
447 break;
448 case 'i':
449 pkt_interval = get_timevar(optarg);
450 break;
451 case 'I':
452 target_interval = get_timevar(optarg);
453 break;
454 case 'w':
455 get_threshold(optarg, &warn);
456 break;
457 case 'c':
458 get_threshold(optarg, &crit);
459 break;
460 case 'n':
461 case 'p':
462 packets = strtoul(optarg, NULL, 0);
463 break;
464 case 't':
465 timeout = strtoul(optarg, NULL, 0);
466 if(!timeout) timeout = 10;
467 break;
468 case 'H':
469 add_target(optarg);
470 break;
471 case 'l':
472 ttl = (unsigned char)strtoul(optarg, NULL, 0);
473 break;
474 case 'd': /* implement later, for cluster checks */
475 warn_down = (unsigned char)strtoul(optarg, &ptr, 0);
476 if(ptr) {
477 crit_down = (unsigned char)strtoul(ptr + 1, NULL, 0);
478 }
479 break;
480 case 'h': case 'V': default:
481 usage(arg, NULL);
482 break;
483 }
484 }
485 }
487 argv = &argv[optind];
488 while(*argv) {
489 add_target(*argv);
490 argv++;
491 }
492 if(!targets) {
493 errno = 0;
494 crash("No hosts to check");
495 exit(3);
496 }
498 if(!sockets) {
499 if(icmp_sock == -1) {
500 errno = icmp_sockerrno;
501 crash("Failed to obtain ICMP socket");
502 return -1;
503 }
504 /* if(udp_sock == -1) { */
505 /* errno = icmp_sockerrno; */
506 /* crash("Failed to obtain UDP socket"); */
507 /* return -1; */
508 /* } */
509 /* if(tcp_sock == -1) { */
510 /* errno = icmp_sockerrno; */
511 /* crash("Failed to obtain TCP socker"); */
512 /* return -1; */
513 /* } */
514 }
515 if(!ttl) ttl = 64;
517 if(icmp_sock) {
518 result = setsockopt(icmp_sock, SOL_IP, IP_TTL, &ttl, sizeof(ttl));
519 if(debug) {
520 if(result == -1) printf("setsockopt failed\n");
521 else printf("ttl set to %u\n", ttl);
522 }
523 }
525 /* stupid users should be able to give whatever thresholds they want
526 * (nothing will break if they do), but some anal plugin maintainer
527 * will probably add some printf() thing here later, so it might be
528 * best to at least show them where to do it. ;) */
529 if(warn.pl > crit.pl) warn.pl = crit.pl;
530 if(warn.rta > crit.rta) warn.rta = crit.rta;
531 if(warn_down > crit_down) crit_down = warn_down;
533 signal(SIGINT, finish);
534 signal(SIGHUP, finish);
535 signal(SIGTERM, finish);
536 signal(SIGALRM, finish);
537 if(debug) printf("Setting alarm timeout to %u seconds\n", timeout);
538 alarm(timeout);
540 /* make sure we don't wait any longer than necessary */
541 gettimeofday(&prog_start, &tz);
542 max_completion_time =
543 ((targets * packets * pkt_interval) + (targets * target_interval)) +
544 (targets * packets * crit.rta) + crit.rta;
546 if(debug) {
547 printf("packets: %u, targets: %u\n"
548 "target_interval: %0.3f, pkt_interval %0.3f\n"
549 "crit.rta: %0.3f\n"
550 "max_completion_time: %0.3f\n",
551 packets, targets,
552 (float)target_interval / 1000, (float)pkt_interval / 1000,
553 (float)crit.rta / 1000,
554 (float)max_completion_time / 1000);
555 }
557 if(debug) {
558 if(max_completion_time > (u_int)timeout * 1000000) {
559 printf("max_completion_time: %llu timeout: %u\n",
560 max_completion_time, timeout);
561 printf("Timout must be at lest %llu\n",
562 max_completion_time / 1000000 + 1);
563 }
564 }
566 icmp_pkt_size = icmp_data_size + ICMP_MINLEN;
567 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
568 if(icmp_pkt_size < sizeof(struct icmp) + sizeof(struct icmp_ping_data)) {
569 icmp_pkt_size = sizeof(struct icmp) + sizeof(struct icmp_ping_data);
570 }
571 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
573 if(debug) {
574 printf("crit = {%u, %u%%}, warn = {%u, %u%%}\n",
575 crit.rta, crit.pl, warn.rta, warn.pl);
576 printf("pkt_interval: %u target_interval: %u retry_interval: %u\n",
577 pkt_interval, target_interval, retry_interval);
578 printf("icmp_pkt_size: %u timeout: %u\n",
579 icmp_pkt_size, timeout);
580 }
582 if(packets > 20) {
583 errno = 0;
584 crash("packets is > 20 (%d)", packets);
585 }
587 host = list;
588 table = malloc(sizeof(struct rta_host **) * (argc - 1));
589 i = 0;
590 while(host) {
591 host->id = i;
592 table[i] = host;
593 host = host->next;
594 i++;
595 }
597 run_checks();
599 errno = 0;
600 finish(0);
602 return(0);
603 }
605 static void
606 run_checks()
607 {
608 u_int i, t, result;
609 u_int final_wait, time_passed;
611 /* this loop might actually violate the pkt_interval or target_interval
612 * settings, but only if there aren't any packets on the wire which
613 * indicates that the target can handle an increased packet rate */
614 for(i = 0; i < packets; i++) {
615 for(t = 0; t < targets; t++) {
616 /* don't send useless packets */
617 if(!targets_alive) finish(0);
618 if(table[t]->flags & FLAG_LOST_CAUSE) {
619 if(debug) printf("%s is a lost cause. not sending any more\n",
620 table[t]->name);
621 continue;
622 }
624 /* we're still in the game, so send next packet */
625 (void)send_icmp_ping(icmp_sock, table[t]);
626 result = wait_for_reply(icmp_sock, target_interval);
627 }
628 result = wait_for_reply(icmp_sock, pkt_interval * targets);
629 }
631 if(icmp_pkts_en_route && targets_alive) {
632 time_passed = get_timevaldiff(NULL, NULL);
633 final_wait = max_completion_time - time_passed;
635 if(debug) {
636 printf("time_passed: %u final_wait: %u max_completion_time: %llu\n",
637 time_passed, final_wait, max_completion_time);
638 }
639 if(time_passed > max_completion_time) {
640 if(debug) printf("Time passed. Finishing up\n");
641 finish(0);
642 }
644 /* catch the packets that might come in within the timeframe, but
645 * haven't yet */
646 if(debug) printf("Waiting for %u micro-seconds (%0.3f msecs)\n",
647 final_wait, (float)final_wait / 1000);
648 result = wait_for_reply(icmp_sock, final_wait);
649 }
650 }
652 /* response structure:
653 * ip header : 20 bytes
654 * icmp header : 28 bytes
655 * icmp echo reply : the rest
656 */
657 static int
658 wait_for_reply(int sock, u_int t)
659 {
660 int n, hlen;
661 static char buf[4096];
662 struct sockaddr_in resp_addr;
663 struct ip *ip;
664 struct icmp *icp, *sent_icmp;
665 struct rta_host *host;
666 struct icmp_ping_data *data;
667 struct timeval wait_start, now;
668 u_int tdiff, i, per_pkt_wait;
670 /* if we can't listen or don't have anything to listen to, just return */
671 if(!t || !icmp_pkts_en_route) return 0;
673 gettimeofday(&wait_start, &tz);
675 i = t;
676 per_pkt_wait = t / icmp_pkts_en_route;
677 while(icmp_pkts_en_route && get_timevaldiff(&wait_start, NULL) < i) {
678 t = per_pkt_wait;
680 /* wrap up if all targets are declared dead */
681 if(!targets_alive ||
682 get_timevaldiff(&prog_start, NULL) >= max_completion_time ||
683 (mode == MODE_HOSTCHECK && targets_down))
684 {
685 finish(0);
686 }
688 /* reap responses until we hit a timeout */
689 n = recvfrom_wto(sock, buf, sizeof(buf),
690 (struct sockaddr *)&resp_addr, &t);
691 if(!n) {
692 if(debug > 1) {
693 printf("recvfrom_wto() timed out during a %u usecs wait\n",
694 per_pkt_wait);
695 }
696 continue; /* timeout for this one, so keep trying */
697 }
698 if(n < 0) {
699 if(debug) printf("recvfrom_wto() returned errors\n");
700 return n;
701 }
703 ip = (struct ip *)buf;
704 if(debug > 1) printf("received %u bytes from %s\n",
705 ntohs(ip->ip_len), inet_ntoa(resp_addr.sin_addr));
707 /* obsolete. alpha on tru64 provides the necessary defines, but isn't broken */
708 /* #if defined( __alpha__ ) && __STDC__ && !defined( __GLIBC__ ) */
709 /* alpha headers are decidedly broken. Using an ansi compiler,
710 * they provide ip_vhl instead of ip_hl and ip_v, so we mask
711 * off the bottom 4 bits */
712 /* hlen = (ip->ip_vhl & 0x0f) << 2; */
713 /* #else */
714 hlen = ip->ip_hl << 2;
715 /* #endif */
717 if(n < (hlen + ICMP_MINLEN)) {
718 crash("received packet too short for ICMP (%d bytes, expected %d) from %s\n",
719 n, hlen + icmp_pkt_size, inet_ntoa(resp_addr.sin_addr));
720 }
721 /* else if(debug) { */
722 /* printf("ip header size: %u, packet size: %u (expected %u, %u)\n", */
723 /* hlen, ntohs(ip->ip_len) - hlen, */
724 /* sizeof(struct ip), icmp_pkt_size); */
725 /* } */
727 /* check the response */
728 icp = (struct icmp *)(buf + hlen);
729 sent_icmp = (struct icmp *)(buf + hlen + ICMP_MINLEN);
730 /* printf("buf: %p, icp: %p, distance: %u (expected %u)\n", */
731 /* buf, icp, */
732 /* (u_int)icp - (u_int)buf, hlen); */
733 /* printf("buf: %p, sent_icmp: %p, distance: %u (expected %u)\n", */
734 /* buf, sent_icmp, */
735 /* (u_int)sent_icmp - (u_int)buf, hlen + ICMP_MINLEN); */
737 if(icp->icmp_id != pid) {
738 handle_random_icmp(icp, &resp_addr);
739 continue;
740 }
742 if(icp->icmp_type != ICMP_ECHOREPLY || icp->icmp_seq >= targets) {
743 if(debug > 2) printf("not a proper ICMP_ECHOREPLY\n");
744 handle_random_icmp(icp, &resp_addr);
745 continue;
746 }
748 /* this is indeed a valid response */
749 data = (struct icmp_ping_data *)(icp->icmp_data);
751 host = table[icp->icmp_seq];
752 gettimeofday(&now, &tz);
753 tdiff = get_timevaldiff(&data->stime, &now);
755 host->time_waited += tdiff;
756 host->icmp_recv++;
757 icmp_recv++;
759 if(debug) {
760 printf("%0.3f ms rtt from %s, outgoing ttl: %u, incoming ttl: %u\n",
761 (float)tdiff / 1000, inet_ntoa(resp_addr.sin_addr),
762 ttl, ip->ip_ttl);
763 }
765 /* if we're in hostcheck mode, exit with limited printouts */
766 if(mode == MODE_HOSTCHECK) {
767 printf("OK - %s responds to ICMP. Packet %u, rta %0.3fms|"
768 "pkt=%u;;0;%u rta=%0.3f;%0.3f;%0.3f;;\n",
769 host->name, icmp_recv, (float)tdiff / 1000,
770 icmp_recv, packets, (float)tdiff / 1000,
771 (float)warn.rta / 1000, (float)crit.rta / 1000);
772 exit(STATE_OK);
773 }
774 }
776 return 0;
777 }
779 /* the ping functions */
780 static int
781 send_icmp_ping(int sock, struct rta_host *host)
782 {
783 static char *buf = NULL; /* re-use so we prevent leaks */
784 long int len;
785 struct icmp *icp;
786 struct icmp_ping_data *data;
787 struct timeval tv;
788 struct sockaddr *addr;
791 if(sock == -1) {
792 errno = 0;
793 crash("Attempt to send on bogus socket");
794 return -1;
795 }
796 addr = (struct sockaddr *)&host->saddr_in;
798 if(!buf) {
799 buf = (char *)malloc(icmp_pkt_size + sizeof(struct ip));
800 if(!buf) {
801 crash("send_icmp_ping(): failed to malloc %d bytes for send buffer",
802 icmp_pkt_size);
803 return -1; /* might be reached if we're in debug mode */
804 }
805 }
806 memset(buf, 0, icmp_pkt_size + sizeof(struct ip));
808 if((gettimeofday(&tv, &tz)) == -1) return -1;
810 icp = (struct icmp *)buf;
811 icp->icmp_type = ICMP_ECHO;
812 icp->icmp_code = 0;
813 icp->icmp_cksum = 0;
814 icp->icmp_id = pid;
815 icp->icmp_seq = host->id;
816 data = (struct icmp_ping_data *)icp->icmp_data;
817 data->ping_id = 10; /* host->icmp.icmp_sent; */
818 memcpy(&data->stime, &tv, sizeof(struct timeval));
819 icp->icmp_cksum = icmp_checksum((u_short *)icp, icmp_pkt_size);
821 len = sendto(sock, buf, icmp_pkt_size, 0, (struct sockaddr *)addr,
822 sizeof(struct sockaddr));
824 if(len < 0 || (unsigned int)len != icmp_pkt_size) {
825 if(debug) printf("Failed to send ping to %s\n",
826 inet_ntoa(host->saddr_in.sin_addr));
827 return -1;
828 }
830 icmp_sent++;
831 host->icmp_sent++;
833 return 0;
834 }
836 static int
837 recvfrom_wto(int sock, char *buf, unsigned int len, struct sockaddr *saddr,
838 u_int *timo)
839 {
840 u_int slen;
841 int n;
842 struct timeval to, then, now;
843 fd_set rd, wr;
845 if(!*timo) {
846 if(debug) printf("*timo is not\n");
847 return 0;
848 }
850 to.tv_sec = *timo / 1000000;
851 to.tv_usec = (*timo - (to.tv_sec * 1000000));
853 FD_ZERO(&rd);
854 FD_ZERO(&wr);
855 FD_SET(sock, &rd);
856 errno = 0;
857 gettimeofday(&then, &tz);
858 n = select(sock + 1, &rd, &wr, NULL, &to);
859 if(n < 0) crash("select() in recvfrom_wto");
860 gettimeofday(&now, &tz);
861 *timo = get_timevaldiff(&then, &now);
863 if(!n) return 0; /* timeout */
865 slen = sizeof(struct sockaddr);
867 return recvfrom(sock, buf, len, 0, saddr, &slen);
868 }
870 static void
871 finish(int sig)
872 {
873 u_int i = 0;
874 unsigned char pl;
875 double rta;
876 struct rta_host *host;
877 char *status_string[] =
878 {"OK", "WARNING", "CRITICAL", "UNKNOWN", "DEPENDENT"};
880 alarm(0);
881 if(debug > 1) printf("finish(%d) called\n", sig);
883 if(icmp_sock != -1) close(icmp_sock);
884 if(udp_sock != -1) close(udp_sock);
885 if(tcp_sock != -1) close(tcp_sock);
887 if(debug) {
888 printf("icmp_sent: %u icmp_recv: %u icmp_lost: %u\n",
889 icmp_sent, icmp_recv, icmp_lost);
890 printf("targets: %u targets_alive: %u\n", targets, targets_alive);
891 }
893 /* iterate thrice to calculate values, give output, and print perfparse */
894 host = list;
895 while(host) {
896 if(!host->icmp_recv) {
897 /* rta 0 is ofcourse not entirely correct, but will still show up
898 * conspicuosly as missing entries in perfparse and cacti */
899 pl = 100;
900 rta = 0;
901 status = STATE_CRITICAL;
902 /* up the down counter if not already counted */
903 if(!(host->flags & FLAG_LOST_CAUSE) && targets_alive) targets_down++;
904 }
905 else {
906 pl = ((host->icmp_sent - host->icmp_recv) * 100) / host->icmp_sent;
907 rta = (double)host->time_waited / host->icmp_recv;
908 }
909 host->pl = pl;
910 host->rta = rta;
911 if(!status && (pl >= warn.pl || rta >= warn.rta)) status = STATE_WARNING;
912 if(pl >= crit.pl || rta >= crit.rta) status = STATE_CRITICAL;
914 host = host->next;
915 }
916 /* this is inevitable */
917 if(!targets_alive) status = STATE_CRITICAL;
918 printf("%s - ", status_string[status]);
920 host = list;
921 while(host) {
922 if(debug) puts("");
923 if(i) {
924 if(i < targets) printf(" :: ");
925 else printf("\n");
926 }
927 i++;
928 if(!host->icmp_recv) {
929 status = STATE_CRITICAL;
930 if(host->flags & FLAG_LOST_CAUSE) {
931 printf("%s: %s @ %s. rta nan, lost %d%%",
932 host->name,
933 get_icmp_error_msg(host->icmp_type, host->icmp_code),
934 inet_ntoa(host->error_addr),
935 100);
936 }
937 else { /* not marked as lost cause, so we have no flags for it */
938 printf("%s: rta nan, lost 100%%", host->name);
939 }
940 }
941 else { /* !icmp_recv */
942 printf("%s: rta %0.3fms, lost %u%%",
943 host->name, host->rta / 1000, host->pl);
944 }
946 host = host->next;
947 }
949 /* iterate once more for pretty perfparse output */
950 printf("|");
951 i = 0;
952 host = list;
953 while(host) {
954 if(debug) puts("");
955 printf("%srta=%0.3fms;%0.3f;%0.3f;0; %spl=%u%%;%u;%u;; ",
956 (targets > 1) ? host->name : "",
957 host->rta / 1000, (float)warn.rta / 1000, (float)crit.rta / 1000,
958 (targets > 1) ? host->name : "",
959 host->pl, warn.pl, crit.pl);
961 host = host->next;
962 }
964 /* finish with an empty line */
965 puts("");
966 if(debug) printf("targets: %u, targets_alive: %u\n",
967 targets, targets_alive);
969 exit(status);
970 }
972 static u_int
973 get_timevaldiff(struct timeval *early, struct timeval *later)
974 {
975 u_int ret;
976 struct timeval now;
978 if(!later) {
979 gettimeofday(&now, &tz);
980 later = &now;
981 }
982 if(!early) early = &prog_start;
984 /* if early > later we return 0 so as to indicate a timeout */
985 if(early->tv_sec > early->tv_sec ||
986 (early->tv_sec == later->tv_sec && early->tv_usec > later->tv_usec))
987 {
988 return 0;
989 }
991 ret = (later->tv_sec - early->tv_sec) * 1000000;
992 ret += later->tv_usec - early->tv_usec;
994 return ret;
995 }
997 static int
998 add_target_ip(char *arg, struct in_addr *in)
999 {
1000 struct rta_host *host;
1002 /* disregard obviously stupid addresses */
1003 if(in->s_addr == INADDR_NONE || in->s_addr == INADDR_ANY)
1004 return -1;
1006 /* no point in adding two identical IP's, so don't. ;) */
1007 host = list;
1008 while(host) {
1009 if(host->saddr_in.sin_addr.s_addr == in->s_addr) {
1010 if(debug) printf("Identical IP already exists. Not adding %s\n", arg);
1011 return -1;
1012 }
1013 host = host->next;
1014 }
1016 /* add the fresh ip */
1017 host = malloc(sizeof(struct rta_host));
1018 if(!host) {
1019 crash("add_target_ip(%s, %s): malloc(%d) failed",
1020 arg, inet_ntoa(*in), sizeof(struct rta_host));
1021 }
1022 memset(host, 0, sizeof(struct rta_host));
1024 /* set the values. use calling name for output */
1025 host->name = strdup(arg);
1027 /* fill out the sockaddr_in struct */
1028 host->saddr_in.sin_family = AF_INET;
1029 host->saddr_in.sin_addr.s_addr = in->s_addr;
1031 if(!list) list = cursor = host;
1032 else cursor->next = host;
1034 cursor = host;
1035 targets++;
1037 return 0;
1038 }
1040 /* wrapper for add_target_ip */
1041 static int
1042 add_target(char *arg)
1043 {
1044 int i;
1045 struct hostent *he;
1046 struct in_addr *in, ip;
1048 /* don't resolve if we don't have to */
1049 if((ip.s_addr = inet_addr(arg)) != INADDR_NONE) {
1050 /* don't add all ip's if we were given a specific one */
1051 return add_target_ip(arg, &ip);
1052 /* he = gethostbyaddr((char *)in, sizeof(struct in_addr), AF_INET); */
1053 /* if(!he) return add_target_ip(arg, in); */
1054 }
1055 else {
1056 errno = 0;
1057 he = gethostbyname(arg);
1058 if(!he) {
1059 errno = 0;
1060 crash("Failed to resolve %s", arg);
1061 return -1;
1062 }
1063 }
1065 /* possibly add all the IP's as targets */
1066 for(i = 0; he->h_addr_list[i]; i++) {
1067 in = (struct in_addr *)he->h_addr_list[i];
1068 add_target_ip(arg, in);
1070 /* this is silly, but it works */
1071 if(mode == MODE_HOSTCHECK || mode == MODE_ALL) {
1072 printf("mode: %d\n", mode);
1073 continue;
1074 }
1075 break;
1076 }
1078 return 0;
1079 }
1080 /*
1081 * u = micro
1082 * m = milli
1083 * s = seconds
1084 * return value is in microseconds
1085 */
1086 static u_int
1087 get_timevar(const char *str)
1088 {
1089 char p, u, *ptr;
1090 unsigned int len;
1091 u_int i, d; /* integer and decimal, respectively */
1092 u_int factor = 1000; /* default to milliseconds */
1094 if(!str) return 0;
1095 len = strlen(str);
1096 if(!len) return 0;
1098 /* unit might be given as ms|m (millisec),
1099 * us|u (microsec) or just plain s, for seconds */
1100 u = p = '\0';
1101 u = str[len - 1];
1102 if(len >= 2 && !isdigit((int)str[len - 2])) p = str[len - 2];
1103 if(p && u == 's') u = p;
1104 else if(!p) p = u;
1105 if(debug > 2) printf("evaluating %s, u: %c, p: %c\n", str, u, p);
1107 if(u == 'u') factor = 1; /* microseconds */
1108 else if(u == 'm') factor = 1000; /* milliseconds */
1109 else if(u == 's') factor = 1000000; /* seconds */
1110 if(debug > 2) printf("factor is %u\n", factor);
1112 i = strtoul(str, &ptr, 0);
1113 if(!ptr || *ptr != '.' || strlen(ptr) < 2 || factor == 1)
1114 return i * factor;
1116 /* time specified in usecs can't have decimal points, so ignore them */
1117 if(factor == 1) return i;
1119 d = strtoul(ptr + 1, NULL, 0);
1121 /* d is decimal, so get rid of excess digits */
1122 while(d >= factor) d /= 10;
1124 /* the last parenthesis avoids floating point exceptions. */
1125 return ((i * factor) + (d * (factor / 10)));
1126 }
1128 /* not too good at checking errors, but it'll do (main() should barfe on -1) */
1129 static int
1130 get_threshold(char *str, threshold *th)
1131 {
1132 char *p = NULL, i = 0;
1134 if(!str || !strlen(str) || !th) return -1;
1136 /* pointer magic slims code by 10 lines. i is bof-stop on stupid libc's */
1137 p = &str[strlen(str) - 1];
1138 while(p != &str[1]) {
1139 if(*p == '%') *p = '\0';
1140 else if(*p == ',' && i) {
1141 *p = '\0'; /* reset it so get_timevar(str) works nicely later */
1142 th->pl = (unsigned char)strtoul(p+1, NULL, 0);
1143 break;
1144 }
1145 i = 1;
1146 p--;
1147 }
1148 th->rta = get_timevar(str);
1150 if(!th->rta) return -1;
1152 if(th->rta > MAXTTL * 1000000) th->rta = MAXTTL * 1000000;
1153 if(th->pl > 100) th->pl = 100;
1155 return 0;
1156 }
1158 unsigned short
1159 icmp_checksum(unsigned short *p, int n)
1160 {
1161 register unsigned short cksum;
1162 register long sum = 0;
1164 while(n > 1) {
1165 sum += *p++;
1166 n -= 2;
1167 }
1169 /* mop up the occasional odd byte */
1170 if(n == 1) sum += (unsigned char)*p;
1172 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
1173 sum += (sum >> 16); /* add carry */
1174 cksum = ~sum; /* ones-complement, trunc to 16 bits */
1176 return cksum;
1177 }
1179 /* make core plugin developers happy (silly, really) */
1180 static void
1181 usage(unsigned char arg, char *msg)
1182 {
1183 if(msg) printf("%s: %s\n", progname, msg);
1185 if(arg == 'V') {
1186 printf("$Id$\n");
1187 exit(STATE_UNKNOWN);
1188 }
1190 printf("Usage: %s [options] [-H] host1 host2 hostn\n\n", progname);
1192 if(arg != 'h') exit(3);
1194 printf("Where options are any combination of:\n"
1195 " * -H | --host specify a target\n"
1196 " * -w | --warn warning threshold (currently %0.3fms,%u%%)\n"
1197 " * -c | --crit critical threshold (currently %0.3fms,%u%%)\n"
1198 " * -n | --packets number of packets to send (currently %u)\n"
1199 " * -i | --interval max packet interval (currently %0.3fms)\n"
1200 " * -I | --hostint max target interval (currently %0.3fms)\n"
1201 " * -l | --ttl TTL on outgoing packets (currently %u)\n"
1202 " * -t | --timeout timeout value (seconds, currently %u)\n"
1203 " * -b | --bytes icmp packet size (currenly ignored)\n"
1204 " -v | --verbose verbosity++\n"
1205 " -h | --help this cruft\n",
1206 (float)warn.rta / 1000, warn.pl, (float)crit.rta / 1000, crit.pl,
1207 packets,
1208 (float)pkt_interval / 1000, (float)target_interval / 1000,
1209 ttl, timeout);
1211 puts("\nThe -H switch is optional. Naming a host (or several) to check is not.\n\n"
1212 "Threshold format for -w and -c is 200.25,60% for 200.25 msec RTA and 60%\n"
1213 "packet loss. The default values should work well for most users.\n"
1214 "You can specify different RTA factors using the standardized abbreviations\n"
1215 "us (microseconds), ms (milliseconds, default) or just plain s for seconds.\n\n"
1216 "Threshold format for -d is warn,crit. 12,14 means WARNING if >= 12 hops\n"
1217 "are spent and CRITICAL if >= 14 hops are spent.\n"
1218 "NOTE: Some systems decrease TTL when forming ICMP_ECHOREPLY, others do not.\n\n"
1219 "The -v switch can be specified several times for increased verbosity.\n\n"
1220 "Long options are currently unsupported.\n\n"
1221 "Options marked with * require an argument\n");
1223 puts("The latest version of this plugin can be found at http://oss.op5.se/nagios\n"
1224 "or https://devel.op5.se/oss until the day it is included in the official\n"
1225 "plugin distribution.\n");
1227 exit(3);
1228 }