1 /******************************************************************************
2 *
3 * Nagios check_icmp plugin
4 *
5 * License: GPL
6 * Copyright (c) 2005-2006 nagios-plugins team
7 *
8 * Original Author : Andreas Ericsson <ae@op5.se>
9 *
10 * Last Modified: $Date$
11 *
12 * Description:
13 *
14 * This file contains the check_icmp plugin
15 *
16 * Relevant RFC's: 792 (ICMP), 791 (IP)
17 *
18 * This program was modeled somewhat after the check_icmp program,
19 * which was in turn a hack of fping (www.fping.org) but has been
20 * completely rewritten since to generate higher precision rta values,
21 * and support several different modes as well as setting ttl to control.
22 * redundant routes. The only remainders of fping is currently a few
23 * function names.
24 *
25 * License Information:
26 *
27 * This program is free software; you can redistribute it and/or modify
28 * it under the terms of the GNU General Public License as published by
29 * the Free Software Foundation; either version 2 of the License, or
30 * (at your option) any later version.
31 *
32 * This program is distributed in the hope that it will be useful,
33 * but WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35 * GNU General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, write to the Free Software
39 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
40 *
41 * $Id$
42 *
43 *****************************************************************************/
45 /* progname may change */
46 /* char *progname = "check_icmp"; */
47 char *progname;
48 const char *revision = "$Revision$";
49 const char *copyright = "2005-2006";
50 const char *email = "nagiosplug-devel@lists.sourceforge.net";
52 /** nagios plugins basic includes */
53 #include "common.h"
54 #include "netutils.h"
55 #include "utils.h"
57 #include <sys/time.h>
58 #include <sys/types.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <stdarg.h>
62 #include <unistd.h>
63 #include <stddef.h>
64 #include <errno.h>
65 #include <string.h>
66 #include <ctype.h>
67 #include <netdb.h>
68 #include <sys/socket.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/in.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_icmp.h>
73 #include <arpa/inet.h>
74 #include <signal.h>
77 /** sometimes undefined system macros (quite a few, actually) **/
78 #ifndef MAXTTL
79 # define MAXTTL 255
80 #endif
81 #ifndef INADDR_NONE
82 # define INADDR_NONE 0xffffffU
83 #endif
85 #ifndef SOL_IP
86 #define SOL_IP 0
87 #endif
89 /* we bundle these in one #ifndef, since they're all from BSD
90 * Put individual #ifndef's around those that bother you */
91 #ifndef ICMP_UNREACH_NET_UNKNOWN
92 # define ICMP_UNREACH_NET_UNKNOWN 6
93 # define ICMP_UNREACH_HOST_UNKNOWN 7
94 # define ICMP_UNREACH_ISOLATED 8
95 # define ICMP_UNREACH_NET_PROHIB 9
96 # define ICMP_UNREACH_HOST_PROHIB 10
97 # define ICMP_UNREACH_TOSNET 11
98 # define ICMP_UNREACH_TOSHOST 12
99 #endif
100 /* tru64 has the ones above, but not these */
101 #ifndef ICMP_UNREACH_FILTER_PROHIB
102 # define ICMP_UNREACH_FILTER_PROHIB 13
103 # define ICMP_UNREACH_HOST_PRECEDENCE 14
104 # define ICMP_UNREACH_PRECEDENCE_CUTOFF 15
105 #endif
108 typedef unsigned short range_t; /* type for get_range() -- unimplemented */
110 typedef struct rta_host {
111 unsigned short id; /* id in **table, and icmp pkts */
112 char *name; /* arg used for adding this host */
113 char *msg; /* icmp error message, if any */
114 struct sockaddr_in saddr_in; /* the address of this host */
115 struct in_addr error_addr; /* stores address of error replies */
116 unsigned long long time_waited; /* total time waited, in usecs */
117 unsigned int icmp_sent, icmp_recv, icmp_lost; /* counters */
118 unsigned char icmp_type, icmp_code; /* type and code from errors */
119 unsigned short flags; /* control/status flags */
120 double rta; /* measured RTA */
121 unsigned char pl; /* measured packet loss */
122 struct rta_host *next; /* linked list */
123 } rta_host;
125 #define FLAG_LOST_CAUSE 0x01 /* decidedly dead target. */
127 /* threshold structure. all values are maximum allowed, exclusive */
128 typedef struct threshold {
129 unsigned char pl; /* max allowed packet loss in percent */
130 unsigned int rta; /* roundtrip time average, microseconds */
131 } threshold;
133 /* the data structure */
134 typedef struct icmp_ping_data {
135 struct timeval stime; /* timestamp (saved in protocol struct as well) */
136 unsigned short ping_id;
137 } icmp_ping_data;
139 /* the different modes of this program are as follows:
140 * MODE_RTA: send all packets no matter what (mimic check_icmp and check_ping)
141 * MODE_HOSTCHECK: Return immediately upon any sign of life
142 * In addition, sends packets to ALL addresses assigned
143 * to this host (as returned by gethostbyname() or
144 * gethostbyaddr() and expects one host only to be checked at
145 * a time. Therefore, any packet response what so ever will
146 * count as a sign of life, even when received outside
147 * crit.rta limit. Do not misspell any additional IP's.
148 * MODE_ALL: Requires packets from ALL requested IP to return OK (default).
149 * MODE_ICMP: implement something similar to check_icmp (MODE_RTA without
150 * tcp and udp args does this)
151 */
152 #define MODE_RTA 0
153 #define MODE_HOSTCHECK 1
154 #define MODE_ALL 2
155 #define MODE_ICMP 3
157 /* the different ping types we can do
158 * TODO: investigate ARP ping as well */
159 #define HAVE_ICMP 1
160 #define HAVE_UDP 2
161 #define HAVE_TCP 4
162 #define HAVE_ARP 8
164 #define MIN_PING_DATA_SIZE sizeof(struct icmp_ping_data)
165 #define MAX_IP_PKT_SIZE 65536 /* (theoretical) max IP packet size */
166 #define IP_HDR_SIZE 20
167 #define MAX_PING_DATA (MAX_IP_PKT_SIZE - IP_HDR_SIZE - ICMP_MINLEN)
168 #define DEFAULT_PING_DATA_SIZE (MIN_PING_DATA_SIZE + 44)
170 /* various target states */
171 #define TSTATE_INACTIVE 0x01 /* don't ping this host anymore */
172 #define TSTATE_WAITING 0x02 /* unanswered packets on the wire */
173 #define TSTATE_ALIVE 0x04 /* target is alive (has answered something) */
174 #define TSTATE_UNREACH 0x08
176 /** prototypes **/
177 void print_help (void);
178 void print_usage (void);
179 static u_int get_timevar(const char *);
180 static u_int get_timevaldiff(struct timeval *, struct timeval *);
181 static int wait_for_reply(int, u_int);
182 static int recvfrom_wto(int, char *, unsigned int, struct sockaddr *, u_int *);
183 static int send_icmp_ping(int, struct rta_host *);
184 static int get_threshold(char *str, threshold *th);
185 static void run_checks(void);
186 static int add_target(char *);
187 static int add_target_ip(char *, struct in_addr *);
188 static int handle_random_icmp(struct icmp *, struct sockaddr_in *);
189 static unsigned short icmp_checksum(unsigned short *, int);
190 static void finish(int);
191 static void crash(const char *, ...);
193 /** external **/
194 extern int optind, opterr, optopt;
195 extern char *optarg;
196 extern char **environ;
198 /** global variables **/
199 static struct rta_host **table, *cursor, *list;
200 static threshold crit = {80, 500000}, warn = {40, 200000};
201 static int mode, protocols, sockets, debug = 0, timeout = 10;
202 static unsigned short icmp_pkt_size, icmp_data_size = DEFAULT_PING_DATA_SIZE;
203 static unsigned int icmp_sent = 0, icmp_recv = 0, icmp_lost = 0;
204 #define icmp_pkts_en_route (icmp_sent - (icmp_recv + icmp_lost))
205 static unsigned short targets_down = 0, targets = 0, packets = 0;
206 #define targets_alive (targets - targets_down)
207 static unsigned int retry_interval, pkt_interval, target_interval;
208 static int icmp_sock, tcp_sock, udp_sock, status = STATE_OK;
209 static pid_t pid;
210 static struct timezone tz;
211 static struct timeval prog_start;
212 static unsigned long long max_completion_time = 0;
213 static unsigned char ttl = 0; /* outgoing ttl */
214 static unsigned int warn_down = 1, crit_down = 1; /* host down threshold values */
215 float pkt_backoff_factor = 1.5;
216 float target_backoff_factor = 1.5;
218 /** code start **/
219 static void
220 crash(const char *fmt, ...)
221 {
222 va_list ap;
224 printf("%s: ", progname);
226 va_start(ap, fmt);
227 vprintf(fmt, ap);
228 va_end(ap);
230 if(errno) printf(": %s", strerror(errno));
231 puts("");
233 exit(3);
234 }
237 static char *
238 get_icmp_error_msg(unsigned char icmp_type, unsigned char icmp_code)
239 {
240 char *msg = "unreachable";
242 if(debug > 1) printf("get_icmp_error_msg(%u, %u)\n", icmp_type, icmp_code);
243 switch(icmp_type) {
244 case ICMP_UNREACH:
245 switch(icmp_code) {
246 case ICMP_UNREACH_NET: msg = "Net unreachable"; break;
247 case ICMP_UNREACH_HOST: msg = "Host unreachable"; break;
248 case ICMP_UNREACH_PROTOCOL: msg = "Protocol unreachable (firewall?)"; break;
249 case ICMP_UNREACH_PORT: msg = "Port unreachable (firewall?)"; break;
250 case ICMP_UNREACH_NEEDFRAG: msg = "Fragmentation needed"; break;
251 case ICMP_UNREACH_SRCFAIL: msg = "Source route failed"; break;
252 case ICMP_UNREACH_ISOLATED: msg = "Source host isolated"; break;
253 case ICMP_UNREACH_NET_UNKNOWN: msg = "Unknown network"; break;
254 case ICMP_UNREACH_HOST_UNKNOWN: msg = "Unknown host"; break;
255 case ICMP_UNREACH_NET_PROHIB: msg = "Network denied (firewall?)"; break;
256 case ICMP_UNREACH_HOST_PROHIB: msg = "Host denied (firewall?)"; break;
257 case ICMP_UNREACH_TOSNET: msg = "Bad TOS for network (firewall?)"; break;
258 case ICMP_UNREACH_TOSHOST: msg = "Bad TOS for host (firewall?)"; break;
259 case ICMP_UNREACH_FILTER_PROHIB: msg = "Prohibited by filter (firewall)"; break;
260 case ICMP_UNREACH_HOST_PRECEDENCE: msg = "Host precedence violation"; break;
261 case ICMP_UNREACH_PRECEDENCE_CUTOFF: msg = "Precedence cutoff"; break;
262 default: msg = "Invalid code"; break;
263 }
264 break;
266 case ICMP_TIMXCEED:
267 /* really 'out of reach', or non-existant host behind a router serving
268 * two different subnets */
269 switch(icmp_code) {
270 case ICMP_TIMXCEED_INTRANS: msg = "Time to live exceeded in transit"; break;
271 case ICMP_TIMXCEED_REASS: msg = "Fragment reassembly time exceeded"; break;
272 default: msg = "Invalid code"; break;
273 }
274 break;
276 case ICMP_SOURCEQUENCH: msg = "Transmitting too fast"; break;
277 case ICMP_REDIRECT: msg = "Redirect (change route)"; break;
278 case ICMP_PARAMPROB: msg = "Bad IP header (required option absent)"; break;
280 /* the following aren't error messages, so ignore */
281 case ICMP_TSTAMP:
282 case ICMP_TSTAMPREPLY:
283 case ICMP_IREQ:
284 case ICMP_IREQREPLY:
285 case ICMP_MASKREQ:
286 case ICMP_MASKREPLY:
287 default: msg = ""; break;
288 }
290 return msg;
291 }
293 static int
294 handle_random_icmp(struct icmp *p, struct sockaddr_in *addr)
295 {
296 struct icmp *sent_icmp = NULL;
297 struct rta_host *host = NULL;
298 unsigned char *ptr;
300 if(p->icmp_type == ICMP_ECHO && p->icmp_id == pid) {
301 /* echo request from us to us (pinging localhost) */
302 return 0;
303 }
305 ptr = (unsigned char *)p;
306 if(debug) printf("handle_random_icmp(%p, %p)\n", (void *)p, (void *)addr);
308 /* only handle a few types, since others can't possibly be replies to
309 * us in a sane network (if it is anyway, it will be counted as lost
310 * at summary time, but not as quickly as a proper response */
311 /* TIMXCEED can be an unreach from a router with multiple IP's which
312 * serves two different subnets on the same interface and a dead host
313 * on one net is pinged from the other. The router will respond to
314 * itself and thus set TTL=0 so as to not loop forever. Even when
315 * TIMXCEED actually sends a proper icmp response we will have passed
316 * too many hops to have a hope of reaching it later, in which case it
317 * indicates overconfidence in the network, poor routing or both. */
318 if(p->icmp_type != ICMP_UNREACH && p->icmp_type != ICMP_TIMXCEED &&
319 p->icmp_type != ICMP_SOURCEQUENCH && p->icmp_type != ICMP_PARAMPROB)
320 {
321 return 0;
322 }
324 /* might be for us. At least it holds the original package (according
325 * to RFC 792). If it isn't, just ignore it */
326 sent_icmp = (struct icmp *)(ptr + 28);
327 if(sent_icmp->icmp_type != ICMP_ECHO || sent_icmp->icmp_id != pid ||
328 sent_icmp->icmp_seq >= targets)
329 {
330 if(debug) printf("Packet is no response to a packet we sent\n");
331 return 0;
332 }
334 /* it is indeed a response for us */
335 host = table[sent_icmp->icmp_seq];
336 if(debug) {
337 printf("Received \"%s\" from %s for ICMP ECHO sent to %s.\n",
338 get_icmp_error_msg(p->icmp_type, p->icmp_code),
339 inet_ntoa(addr->sin_addr), host->name);
340 }
342 icmp_lost++;
343 host->icmp_lost++;
344 /* don't spend time on lost hosts any more */
345 if(host->flags & FLAG_LOST_CAUSE) return 0;
347 /* source quench means we're sending too fast, so increase the
348 * interval and mark this packet lost */
349 if(p->icmp_type == ICMP_SOURCEQUENCH) {
350 pkt_interval *= pkt_backoff_factor;
351 target_interval *= target_backoff_factor;
352 }
353 else {
354 targets_down++;
355 host->flags |= FLAG_LOST_CAUSE;
356 }
357 host->icmp_type = p->icmp_type;
358 host->icmp_code = p->icmp_code;
359 host->error_addr.s_addr = addr->sin_addr.s_addr;
361 return 0;
362 }
364 int
365 main(int argc, char **argv)
366 {
367 int i;
368 char *ptr;
369 long int arg;
370 int icmp_sockerrno, udp_sockerrno, tcp_sockerrno;
371 int result;
372 struct rta_host *host;
374 /* we only need to be setsuid when we get the sockets, so do
375 * that before pointer magic (esp. on network data) */
376 icmp_sockerrno = udp_sockerrno = tcp_sockerrno = sockets = 0;
378 if((icmp_sock = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) != -1)
379 sockets |= HAVE_ICMP;
380 else icmp_sockerrno = errno;
382 /* if((udp_sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) != -1) */
383 /* sockets |= HAVE_UDP; */
384 /* else udp_sockerrno = errno; */
386 /* if((tcp_sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) */
387 /* sockets |= HAVE_TCP; */
388 /* else tcp_sockerrno = errno; */
390 /* now drop privileges (no effect if not setsuid or geteuid() == 0) */
391 setuid(getuid());
393 /* POSIXLY_CORRECT might break things, so unset it (the portable way) */
394 environ = NULL;
396 /* use the pid to mark packets as ours */
397 pid = getpid();
398 /* printf("pid = %u\n", pid); */
400 /* get calling name the old-fashioned way for portability instead
401 * of relying on the glibc-ism __progname */
402 ptr = strrchr(argv[0], '/');
403 if(ptr) progname = &ptr[1];
404 else progname = argv[0];
406 /* now set defaults. Use progname to set them initially (allows for
407 * superfast check_host program when target host is up */
408 cursor = list = NULL;
409 table = NULL;
411 mode = MODE_RTA;
412 crit.rta = 500000;
413 crit.pl = 80;
414 warn.rta = 200000;
415 warn.pl = 40;
416 protocols = HAVE_ICMP | HAVE_UDP | HAVE_TCP;
417 pkt_interval = 80000; /* 80 msec packet interval by default */
418 packets = 5;
420 if(!strcmp(progname, "check_icmp") || !strcmp(progname, "check_ping")) {
421 mode = MODE_ICMP;
422 protocols = HAVE_ICMP;
423 }
424 else if(!strcmp(progname, "check_host")) {
425 mode = MODE_HOSTCHECK;
426 pkt_interval = 1000000;
427 packets = 5;
428 crit.rta = warn.rta = 1000000;
429 crit.pl = warn.pl = 100;
430 }
431 else if(!strcmp(progname, "check_rta_multi")) {
432 mode = MODE_ALL;
433 target_interval = 0;
434 pkt_interval = 50000;
435 packets = 5;
436 }
438 /* parse the arguments */
439 for(i = 1; i < argc; i++) {
440 while((arg = getopt(argc, argv, "vhVw:c:n:p:t:H:i:b:I:l:")) != EOF) {
441 switch(arg) {
442 case 'v':
443 debug++;
444 break;
445 case 'b':
446 /* silently ignored for now */
447 break;
448 case 'i':
449 pkt_interval = get_timevar(optarg);
450 break;
451 case 'I':
452 target_interval = get_timevar(optarg);
453 break;
454 case 'w':
455 get_threshold(optarg, &warn);
456 break;
457 case 'c':
458 get_threshold(optarg, &crit);
459 break;
460 case 'n':
461 case 'p':
462 packets = strtoul(optarg, NULL, 0);
463 break;
464 case 't':
465 timeout = strtoul(optarg, NULL, 0);
466 if(!timeout) timeout = 10;
467 break;
468 case 'H':
469 add_target(optarg);
470 break;
471 case 'l':
472 ttl = (unsigned char)strtoul(optarg, NULL, 0);
473 break;
474 case 'd': /* implement later, for cluster checks */
475 warn_down = (unsigned char)strtoul(optarg, &ptr, 0);
476 if(ptr) {
477 crit_down = (unsigned char)strtoul(ptr + 1, NULL, 0);
478 }
479 break;
480 case 'V': /* version */
481 //print_revision (progname, revision);
482 exit (STATE_OK);
483 case 'h': /* help */
484 print_help ();
485 exit (STATE_OK);
486 }
487 }
488 }
490 argv = &argv[optind];
491 while(*argv) {
492 add_target(*argv);
493 argv++;
494 }
495 if(!targets) {
496 errno = 0;
497 crash("No hosts to check");
498 exit(3);
499 }
501 if(!sockets) {
502 if(icmp_sock == -1) {
503 errno = icmp_sockerrno;
504 crash("Failed to obtain ICMP socket");
505 return -1;
506 }
507 /* if(udp_sock == -1) { */
508 /* errno = icmp_sockerrno; */
509 /* crash("Failed to obtain UDP socket"); */
510 /* return -1; */
511 /* } */
512 /* if(tcp_sock == -1) { */
513 /* errno = icmp_sockerrno; */
514 /* crash("Failed to obtain TCP socker"); */
515 /* return -1; */
516 /* } */
517 }
518 if(!ttl) ttl = 64;
520 if(icmp_sock) {
521 result = setsockopt(icmp_sock, SOL_IP, IP_TTL, &ttl, sizeof(ttl));
522 if(debug) {
523 if(result == -1) printf("setsockopt failed\n");
524 else printf("ttl set to %u\n", ttl);
525 }
526 }
528 /* stupid users should be able to give whatever thresholds they want
529 * (nothing will break if they do), but some anal plugin maintainer
530 * will probably add some printf() thing here later, so it might be
531 * best to at least show them where to do it. ;) */
532 if(warn.pl > crit.pl) warn.pl = crit.pl;
533 if(warn.rta > crit.rta) warn.rta = crit.rta;
534 if(warn_down > crit_down) crit_down = warn_down;
536 signal(SIGINT, finish);
537 signal(SIGHUP, finish);
538 signal(SIGTERM, finish);
539 signal(SIGALRM, finish);
540 if(debug) printf("Setting alarm timeout to %u seconds\n", timeout);
541 alarm(timeout);
543 /* make sure we don't wait any longer than necessary */
544 gettimeofday(&prog_start, &tz);
545 max_completion_time =
546 ((targets * packets * pkt_interval) + (targets * target_interval)) +
547 (targets * packets * crit.rta) + crit.rta;
549 if(debug) {
550 printf("packets: %u, targets: %u\n"
551 "target_interval: %0.3f, pkt_interval %0.3f\n"
552 "crit.rta: %0.3f\n"
553 "max_completion_time: %0.3f\n",
554 packets, targets,
555 (float)target_interval / 1000, (float)pkt_interval / 1000,
556 (float)crit.rta / 1000,
557 (float)max_completion_time / 1000);
558 }
560 if(debug) {
561 if(max_completion_time > (u_int)timeout * 1000000) {
562 printf("max_completion_time: %llu timeout: %u\n",
563 max_completion_time, timeout);
564 printf("Timout must be at lest %llu\n",
565 max_completion_time / 1000000 + 1);
566 }
567 }
569 icmp_pkt_size = icmp_data_size + ICMP_MINLEN;
570 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
571 if(icmp_pkt_size < sizeof(struct icmp) + sizeof(struct icmp_ping_data)) {
572 icmp_pkt_size = sizeof(struct icmp) + sizeof(struct icmp_ping_data);
573 }
574 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
576 if(debug) {
577 printf("crit = {%u, %u%%}, warn = {%u, %u%%}\n",
578 crit.rta, crit.pl, warn.rta, warn.pl);
579 printf("pkt_interval: %u target_interval: %u retry_interval: %u\n",
580 pkt_interval, target_interval, retry_interval);
581 printf("icmp_pkt_size: %u timeout: %u\n",
582 icmp_pkt_size, timeout);
583 }
585 if(packets > 20) {
586 errno = 0;
587 crash("packets is > 20 (%d)", packets);
588 }
590 host = list;
591 table = malloc(sizeof(struct rta_host **) * (argc - 1));
592 i = 0;
593 while(host) {
594 host->id = i;
595 table[i] = host;
596 host = host->next;
597 i++;
598 }
600 run_checks();
602 errno = 0;
603 finish(0);
605 return(0);
606 }
608 static void
609 run_checks()
610 {
611 u_int i, t, result;
612 u_int final_wait, time_passed;
614 /* this loop might actually violate the pkt_interval or target_interval
615 * settings, but only if there aren't any packets on the wire which
616 * indicates that the target can handle an increased packet rate */
617 for(i = 0; i < packets; i++) {
618 for(t = 0; t < targets; t++) {
619 /* don't send useless packets */
620 if(!targets_alive) finish(0);
621 if(table[t]->flags & FLAG_LOST_CAUSE) {
622 if(debug) printf("%s is a lost cause. not sending any more\n",
623 table[t]->name);
624 continue;
625 }
627 /* we're still in the game, so send next packet */
628 (void)send_icmp_ping(icmp_sock, table[t]);
629 result = wait_for_reply(icmp_sock, target_interval);
630 }
631 result = wait_for_reply(icmp_sock, pkt_interval * targets);
632 }
634 if(icmp_pkts_en_route && targets_alive) {
635 time_passed = get_timevaldiff(NULL, NULL);
636 final_wait = max_completion_time - time_passed;
638 if(debug) {
639 printf("time_passed: %u final_wait: %u max_completion_time: %llu\n",
640 time_passed, final_wait, max_completion_time);
641 }
642 if(time_passed > max_completion_time) {
643 if(debug) printf("Time passed. Finishing up\n");
644 finish(0);
645 }
647 /* catch the packets that might come in within the timeframe, but
648 * haven't yet */
649 if(debug) printf("Waiting for %u micro-seconds (%0.3f msecs)\n",
650 final_wait, (float)final_wait / 1000);
651 result = wait_for_reply(icmp_sock, final_wait);
652 }
653 }
655 /* response structure:
656 * ip header : 20 bytes
657 * icmp header : 28 bytes
658 * icmp echo reply : the rest
659 */
660 static int
661 wait_for_reply(int sock, u_int t)
662 {
663 int n, hlen;
664 static char buf[4096];
665 struct sockaddr_in resp_addr;
666 struct ip *ip;
667 struct icmp *icp, *sent_icmp;
668 struct rta_host *host;
669 struct icmp_ping_data *data;
670 struct timeval wait_start, now;
671 u_int tdiff, i, per_pkt_wait;
673 /* if we can't listen or don't have anything to listen to, just return */
674 if(!t || !icmp_pkts_en_route) return 0;
676 gettimeofday(&wait_start, &tz);
678 i = t;
679 per_pkt_wait = t / icmp_pkts_en_route;
680 while(icmp_pkts_en_route && get_timevaldiff(&wait_start, NULL) < i) {
681 t = per_pkt_wait;
683 /* wrap up if all targets are declared dead */
684 if(!targets_alive ||
685 get_timevaldiff(&prog_start, NULL) >= max_completion_time ||
686 (mode == MODE_HOSTCHECK && targets_down))
687 {
688 finish(0);
689 }
691 /* reap responses until we hit a timeout */
692 n = recvfrom_wto(sock, buf, sizeof(buf),
693 (struct sockaddr *)&resp_addr, &t);
694 if(!n) {
695 if(debug > 1) {
696 printf("recvfrom_wto() timed out during a %u usecs wait\n",
697 per_pkt_wait);
698 }
699 continue; /* timeout for this one, so keep trying */
700 }
701 if(n < 0) {
702 if(debug) printf("recvfrom_wto() returned errors\n");
703 return n;
704 }
706 ip = (struct ip *)buf;
707 if(debug > 1) printf("received %u bytes from %s\n",
708 ntohs(ip->ip_len), inet_ntoa(resp_addr.sin_addr));
710 /* obsolete. alpha on tru64 provides the necessary defines, but isn't broken */
711 /* #if defined( __alpha__ ) && __STDC__ && !defined( __GLIBC__ ) */
712 /* alpha headers are decidedly broken. Using an ansi compiler,
713 * they provide ip_vhl instead of ip_hl and ip_v, so we mask
714 * off the bottom 4 bits */
715 /* hlen = (ip->ip_vhl & 0x0f) << 2; */
716 /* #else */
717 hlen = ip->ip_hl << 2;
718 /* #endif */
720 if(n < (hlen + ICMP_MINLEN)) {
721 crash("received packet too short for ICMP (%d bytes, expected %d) from %s\n",
722 n, hlen + icmp_pkt_size, inet_ntoa(resp_addr.sin_addr));
723 }
724 /* else if(debug) { */
725 /* printf("ip header size: %u, packet size: %u (expected %u, %u)\n", */
726 /* hlen, ntohs(ip->ip_len) - hlen, */
727 /* sizeof(struct ip), icmp_pkt_size); */
728 /* } */
730 /* check the response */
731 icp = (struct icmp *)(buf + hlen);
732 sent_icmp = (struct icmp *)(buf + hlen + ICMP_MINLEN);
733 /* printf("buf: %p, icp: %p, distance: %u (expected %u)\n", */
734 /* buf, icp, */
735 /* (u_int)icp - (u_int)buf, hlen); */
736 /* printf("buf: %p, sent_icmp: %p, distance: %u (expected %u)\n", */
737 /* buf, sent_icmp, */
738 /* (u_int)sent_icmp - (u_int)buf, hlen + ICMP_MINLEN); */
740 if(icp->icmp_id != pid) {
741 handle_random_icmp(icp, &resp_addr);
742 continue;
743 }
745 if(icp->icmp_type != ICMP_ECHOREPLY || icp->icmp_seq >= targets) {
746 if(debug > 2) printf("not a proper ICMP_ECHOREPLY\n");
747 handle_random_icmp(icp, &resp_addr);
748 continue;
749 }
751 /* this is indeed a valid response */
752 data = (struct icmp_ping_data *)(icp->icmp_data);
754 host = table[icp->icmp_seq];
755 gettimeofday(&now, &tz);
756 tdiff = get_timevaldiff(&data->stime, &now);
758 host->time_waited += tdiff;
759 host->icmp_recv++;
760 icmp_recv++;
762 if(debug) {
763 printf("%0.3f ms rtt from %s, outgoing ttl: %u, incoming ttl: %u\n",
764 (float)tdiff / 1000, inet_ntoa(resp_addr.sin_addr),
765 ttl, ip->ip_ttl);
766 }
768 /* if we're in hostcheck mode, exit with limited printouts */
769 if(mode == MODE_HOSTCHECK) {
770 printf("OK - %s responds to ICMP. Packet %u, rta %0.3fms|"
771 "pkt=%u;;0;%u rta=%0.3f;%0.3f;%0.3f;;\n",
772 host->name, icmp_recv, (float)tdiff / 1000,
773 icmp_recv, packets, (float)tdiff / 1000,
774 (float)warn.rta / 1000, (float)crit.rta / 1000);
775 exit(STATE_OK);
776 }
777 }
779 return 0;
780 }
782 /* the ping functions */
783 static int
784 send_icmp_ping(int sock, struct rta_host *host)
785 {
786 static char *buf = NULL; /* re-use so we prevent leaks */
787 long int len;
788 struct icmp *icp;
789 struct icmp_ping_data *data;
790 struct timeval tv;
791 struct sockaddr *addr;
794 if(sock == -1) {
795 errno = 0;
796 crash("Attempt to send on bogus socket");
797 return -1;
798 }
799 addr = (struct sockaddr *)&host->saddr_in;
801 if(!buf) {
802 buf = (char *)malloc(icmp_pkt_size + sizeof(struct ip));
803 if(!buf) {
804 crash("send_icmp_ping(): failed to malloc %d bytes for send buffer",
805 icmp_pkt_size);
806 return -1; /* might be reached if we're in debug mode */
807 }
808 }
809 memset(buf, 0, icmp_pkt_size + sizeof(struct ip));
811 if((gettimeofday(&tv, &tz)) == -1) return -1;
813 icp = (struct icmp *)buf;
814 icp->icmp_type = ICMP_ECHO;
815 icp->icmp_code = 0;
816 icp->icmp_cksum = 0;
817 icp->icmp_id = pid;
818 icp->icmp_seq = host->id;
819 data = (struct icmp_ping_data *)icp->icmp_data;
820 data->ping_id = 10; /* host->icmp.icmp_sent; */
821 memcpy(&data->stime, &tv, sizeof(struct timeval));
822 icp->icmp_cksum = icmp_checksum((u_short *)icp, icmp_pkt_size);
824 len = sendto(sock, buf, icmp_pkt_size, 0, (struct sockaddr *)addr,
825 sizeof(struct sockaddr));
827 if(len < 0 || (unsigned int)len != icmp_pkt_size) {
828 if(debug) printf("Failed to send ping to %s\n",
829 inet_ntoa(host->saddr_in.sin_addr));
830 return -1;
831 }
833 icmp_sent++;
834 host->icmp_sent++;
836 return 0;
837 }
839 static int
840 recvfrom_wto(int sock, char *buf, unsigned int len, struct sockaddr *saddr,
841 u_int *timo)
842 {
843 u_int slen;
844 int n;
845 struct timeval to, then, now;
846 fd_set rd, wr;
848 if(!*timo) {
849 if(debug) printf("*timo is not\n");
850 return 0;
851 }
853 to.tv_sec = *timo / 1000000;
854 to.tv_usec = (*timo - (to.tv_sec * 1000000));
856 FD_ZERO(&rd);
857 FD_ZERO(&wr);
858 FD_SET(sock, &rd);
859 errno = 0;
860 gettimeofday(&then, &tz);
861 n = select(sock + 1, &rd, &wr, NULL, &to);
862 if(n < 0) crash("select() in recvfrom_wto");
863 gettimeofday(&now, &tz);
864 *timo = get_timevaldiff(&then, &now);
866 if(!n) return 0; /* timeout */
868 slen = sizeof(struct sockaddr);
870 return recvfrom(sock, buf, len, 0, saddr, &slen);
871 }
873 static void
874 finish(int sig)
875 {
876 u_int i = 0;
877 unsigned char pl;
878 double rta;
879 struct rta_host *host;
880 char *status_string[] =
881 {"OK", "WARNING", "CRITICAL", "UNKNOWN", "DEPENDENT"};
883 alarm(0);
884 if(debug > 1) printf("finish(%d) called\n", sig);
886 if(icmp_sock != -1) close(icmp_sock);
887 if(udp_sock != -1) close(udp_sock);
888 if(tcp_sock != -1) close(tcp_sock);
890 if(debug) {
891 printf("icmp_sent: %u icmp_recv: %u icmp_lost: %u\n",
892 icmp_sent, icmp_recv, icmp_lost);
893 printf("targets: %u targets_alive: %u\n", targets, targets_alive);
894 }
896 /* iterate thrice to calculate values, give output, and print perfparse */
897 host = list;
898 while(host) {
899 if(!host->icmp_recv) {
900 /* rta 0 is ofcourse not entirely correct, but will still show up
901 * conspicuosly as missing entries in perfparse and cacti */
902 pl = 100;
903 rta = 0;
904 status = STATE_CRITICAL;
905 /* up the down counter if not already counted */
906 if(!(host->flags & FLAG_LOST_CAUSE) && targets_alive) targets_down++;
907 }
908 else {
909 pl = ((host->icmp_sent - host->icmp_recv) * 100) / host->icmp_sent;
910 rta = (double)host->time_waited / host->icmp_recv;
911 }
912 host->pl = pl;
913 host->rta = rta;
914 if(!status && (pl >= warn.pl || rta >= warn.rta)) status = STATE_WARNING;
915 if(pl >= crit.pl || rta >= crit.rta) status = STATE_CRITICAL;
917 host = host->next;
918 }
919 /* this is inevitable */
920 if(!targets_alive) status = STATE_CRITICAL;
921 printf("%s - ", status_string[status]);
923 host = list;
924 while(host) {
925 if(debug) puts("");
926 if(i) {
927 if(i < targets) printf(" :: ");
928 else printf("\n");
929 }
930 i++;
931 if(!host->icmp_recv) {
932 status = STATE_CRITICAL;
933 if(host->flags & FLAG_LOST_CAUSE) {
934 printf("%s: %s @ %s. rta nan, lost %d%%",
935 host->name,
936 get_icmp_error_msg(host->icmp_type, host->icmp_code),
937 inet_ntoa(host->error_addr),
938 100);
939 }
940 else { /* not marked as lost cause, so we have no flags for it */
941 printf("%s: rta nan, lost 100%%", host->name);
942 }
943 }
944 else { /* !icmp_recv */
945 printf("%s: rta %0.3fms, lost %u%%",
946 host->name, host->rta / 1000, host->pl);
947 }
949 host = host->next;
950 }
952 /* iterate once more for pretty perfparse output */
953 printf("|");
954 i = 0;
955 host = list;
956 while(host) {
957 if(debug) puts("");
958 printf("%srta=%0.3fms;%0.3f;%0.3f;0; %spl=%u%%;%u;%u;; ",
959 (targets > 1) ? host->name : "",
960 host->rta / 1000, (float)warn.rta / 1000, (float)crit.rta / 1000,
961 (targets > 1) ? host->name : "",
962 host->pl, warn.pl, crit.pl);
964 host = host->next;
965 }
967 /* finish with an empty line */
968 puts("");
969 if(debug) printf("targets: %u, targets_alive: %u\n",
970 targets, targets_alive);
972 exit(status);
973 }
975 static u_int
976 get_timevaldiff(struct timeval *early, struct timeval *later)
977 {
978 u_int ret;
979 struct timeval now;
981 if(!later) {
982 gettimeofday(&now, &tz);
983 later = &now;
984 }
985 if(!early) early = &prog_start;
987 /* if early > later we return 0 so as to indicate a timeout */
988 if(early->tv_sec > early->tv_sec ||
989 (early->tv_sec == later->tv_sec && early->tv_usec > later->tv_usec))
990 {
991 return 0;
992 }
994 ret = (later->tv_sec - early->tv_sec) * 1000000;
995 ret += later->tv_usec - early->tv_usec;
997 return ret;
998 }
1000 static int
1001 add_target_ip(char *arg, struct in_addr *in)
1002 {
1003 struct rta_host *host;
1005 /* disregard obviously stupid addresses */
1006 if(in->s_addr == INADDR_NONE || in->s_addr == INADDR_ANY)
1007 return -1;
1009 /* no point in adding two identical IP's, so don't. ;) */
1010 host = list;
1011 while(host) {
1012 if(host->saddr_in.sin_addr.s_addr == in->s_addr) {
1013 if(debug) printf("Identical IP already exists. Not adding %s\n", arg);
1014 return -1;
1015 }
1016 host = host->next;
1017 }
1019 /* add the fresh ip */
1020 host = malloc(sizeof(struct rta_host));
1021 if(!host) {
1022 crash("add_target_ip(%s, %s): malloc(%d) failed",
1023 arg, inet_ntoa(*in), sizeof(struct rta_host));
1024 }
1025 memset(host, 0, sizeof(struct rta_host));
1027 /* set the values. use calling name for output */
1028 host->name = strdup(arg);
1030 /* fill out the sockaddr_in struct */
1031 host->saddr_in.sin_family = AF_INET;
1032 host->saddr_in.sin_addr.s_addr = in->s_addr;
1034 if(!list) list = cursor = host;
1035 else cursor->next = host;
1037 cursor = host;
1038 targets++;
1040 return 0;
1041 }
1043 /* wrapper for add_target_ip */
1044 static int
1045 add_target(char *arg)
1046 {
1047 int i;
1048 struct hostent *he;
1049 struct in_addr *in, ip;
1051 /* don't resolve if we don't have to */
1052 if((ip.s_addr = inet_addr(arg)) != INADDR_NONE) {
1053 /* don't add all ip's if we were given a specific one */
1054 return add_target_ip(arg, &ip);
1055 /* he = gethostbyaddr((char *)in, sizeof(struct in_addr), AF_INET); */
1056 /* if(!he) return add_target_ip(arg, in); */
1057 }
1058 else {
1059 errno = 0;
1060 he = gethostbyname(arg);
1061 if(!he) {
1062 errno = 0;
1063 crash("Failed to resolve %s", arg);
1064 return -1;
1065 }
1066 }
1068 /* possibly add all the IP's as targets */
1069 for(i = 0; he->h_addr_list[i]; i++) {
1070 in = (struct in_addr *)he->h_addr_list[i];
1071 add_target_ip(arg, in);
1073 /* this is silly, but it works */
1074 if(mode == MODE_HOSTCHECK || mode == MODE_ALL) {
1075 printf("mode: %d\n", mode);
1076 continue;
1077 }
1078 break;
1079 }
1081 return 0;
1082 }
1083 /*
1084 * u = micro
1085 * m = milli
1086 * s = seconds
1087 * return value is in microseconds
1088 */
1089 static u_int
1090 get_timevar(const char *str)
1091 {
1092 char p, u, *ptr;
1093 unsigned int len;
1094 u_int i, d; /* integer and decimal, respectively */
1095 u_int factor = 1000; /* default to milliseconds */
1097 if(!str) return 0;
1098 len = strlen(str);
1099 if(!len) return 0;
1101 /* unit might be given as ms|m (millisec),
1102 * us|u (microsec) or just plain s, for seconds */
1103 u = p = '\0';
1104 u = str[len - 1];
1105 if(len >= 2 && !isdigit((int)str[len - 2])) p = str[len - 2];
1106 if(p && u == 's') u = p;
1107 else if(!p) p = u;
1108 if(debug > 2) printf("evaluating %s, u: %c, p: %c\n", str, u, p);
1110 if(u == 'u') factor = 1; /* microseconds */
1111 else if(u == 'm') factor = 1000; /* milliseconds */
1112 else if(u == 's') factor = 1000000; /* seconds */
1113 if(debug > 2) printf("factor is %u\n", factor);
1115 i = strtoul(str, &ptr, 0);
1116 if(!ptr || *ptr != '.' || strlen(ptr) < 2 || factor == 1)
1117 return i * factor;
1119 /* time specified in usecs can't have decimal points, so ignore them */
1120 if(factor == 1) return i;
1122 d = strtoul(ptr + 1, NULL, 0);
1124 /* d is decimal, so get rid of excess digits */
1125 while(d >= factor) d /= 10;
1127 /* the last parenthesis avoids floating point exceptions. */
1128 return ((i * factor) + (d * (factor / 10)));
1129 }
1131 /* not too good at checking errors, but it'll do (main() should barfe on -1) */
1132 static int
1133 get_threshold(char *str, threshold *th)
1134 {
1135 char *p = NULL, i = 0;
1137 if(!str || !strlen(str) || !th) return -1;
1139 /* pointer magic slims code by 10 lines. i is bof-stop on stupid libc's */
1140 p = &str[strlen(str) - 1];
1141 while(p != &str[1]) {
1142 if(*p == '%') *p = '\0';
1143 else if(*p == ',' && i) {
1144 *p = '\0'; /* reset it so get_timevar(str) works nicely later */
1145 th->pl = (unsigned char)strtoul(p+1, NULL, 0);
1146 break;
1147 }
1148 i = 1;
1149 p--;
1150 }
1151 th->rta = get_timevar(str);
1153 if(!th->rta) return -1;
1155 if(th->rta > MAXTTL * 1000000) th->rta = MAXTTL * 1000000;
1156 if(th->pl > 100) th->pl = 100;
1158 return 0;
1159 }
1161 unsigned short
1162 icmp_checksum(unsigned short *p, int n)
1163 {
1164 register unsigned short cksum;
1165 register long sum = 0;
1167 while(n > 1) {
1168 sum += *p++;
1169 n -= 2;
1170 }
1172 /* mop up the occasional odd byte */
1173 if(n == 1) sum += (unsigned char)*p;
1175 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
1176 sum += (sum >> 16); /* add carry */
1177 cksum = ~sum; /* ones-complement, trunc to 16 bits */
1179 return cksum;
1180 }
1182 void
1183 print_help(void)
1184 {
1186 //print_revision (progname, revision);
1188 printf ("Copyright (c) 2005 Andreas Ericsson <ae@op5.se>\n");
1189 printf (COPYRIGHT, copyright, email);
1191 printf ("\n\n");
1193 print_usage ();
1195 printf (_(UT_HELP_VRSN));
1197 printf (" %s\n", "-H");
1198 printf (" %s\n", _("specify a target"));
1199 printf (" %s\n", "-w");
1200 printf (" %s", _("warning threshold (currently "));
1201 printf ("%0.3fms,%u%%)\n", (float)warn.rta / 1000 , warn.pl / 1000);
1202 printf (" %s\n", "-c");
1203 printf (" %s", _("critical threshold (currently "));
1204 printf ("%0.3fms,%u%%)\n", (float)crit.rta), crit.pl;
1205 printf (" %s\n", "-n");
1206 printf (" %s", _("number of packets to send (currently "));
1207 printf ("%u)\n",packets);
1208 printf (" %s\n", "-i");
1209 printf (" %s", _("max packet interval (currently "));
1210 printf ("%0.3fms)\n",(float)pkt_interval / 1000);
1211 printf (" %s\n", "-I");
1212 printf (" %s", _("max target interval (currently "));
1213 printf ("%0.3fms)\n", (float)target_interval / 1000);
1214 printf (" %s\n", "-l");
1215 printf (" %s", _("TTL on outgoing packets (currently "));
1216 printf ("%u)", ttl);
1217 printf (" %s\n", "-t");
1218 printf (" %s",_("timeout value (seconds, currently "));
1219 printf ("%u)\n", timeout);
1220 printf (" %s\n", "-b");
1221 printf (" %s\n", _("icmp packet size (currenly ignored)"));
1222 printf (" %s\n", "-v");
1223 printf (" %s\n", _("verbose"));
1225 printf ("\n");
1226 printf ("%s\n\n", _("The -H switch is optional. Naming a host (or several) to check is not."));
1227 printf ("%s\n", _("Threshold format for -w and -c is 200.25,60% for 200.25 msec RTA and 60%"));
1228 printf ("%s\n", _("packet loss. The default values should work well for most users."));
1229 printf ("%s\n", _("You can specify different RTA factors using the standardized abbreviations"));
1230 printf ("%s\n\n", _("us (microseconds), ms (milliseconds, default) or just plain s for seconds."));
1231 printf ("%s\n", _("Threshold format for -d is warn,crit. 12,14 means WARNING if >= 12 hops"));
1232 printf ("%s\n", _("are spent and CRITICAL if >= 14 hops are spent."));
1233 printf ("%s\n\n", _("NOTE: Some systems decrease TTL when forming ICMP_ECHOREPLY, others do not."));
1234 printf ("%s\n\n", _("The -v switch can be specified several times for increased verbosity."));
1235 /* printf ("%s\n", _("Long options are currently unsupported."));
1236 printf ("%s\n", _("Options marked with * require an argument"));
1237 */
1238 printf (_(UT_SUPPORT));
1240 printf (_(UT_NOWARRANTY));
1241 }
1245 void
1246 print_usage (void)
1247 {
1248 printf (_("Usage:"));
1249 printf(" %s [options] [-H] host1 host2 hostn\n", progname);
1250 }