1 /*
2 * $Id$
3 *
4 * Author: Andreas Ericsson <ae@op5.se>
5 *
6 * License: GNU GPL 2.0 or any later version.
7 *
8 * Relevant RFC's: 792 (ICMP), 791 (IP)
9 *
10 * This program was modeled somewhat after the check_icmp program,
11 * which was in turn a hack of fping (www.fping.org) but has been
12 * completely rewritten since to generate higher precision rta values,
13 * and support several different modes as well as setting ttl to control.
14 * redundant routes. The only remainders of fping is currently a few
15 * function names.
16 *
17 */
19 #include <sys/time.h>
20 #include <sys/types.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <stdarg.h>
24 #include <unistd.h>
25 #include <stddef.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <netdb.h>
30 #include <sys/socket.h>
31 #include <netinet/in_systm.h>
32 #include <netinet/in.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip_icmp.h>
35 #include <arpa/inet.h>
36 #include <signal.h>
38 /** sometimes undefined system macros (quite a few, actually) **/
39 #ifndef MAXTTL
40 # define MAXTTL 255
41 #endif
42 #ifndef INADDR_NONE
43 # define INADDR_NONE 0xffffffU
44 #endif
46 #ifndef SOL_IP
47 #define SOL_IP 0
48 #endif
50 /* we bundle these in one #ifndef, since they're all from BSD
51 * Put individual #ifndef's around those that bother you */
52 #ifndef ICMP_UNREACH_NET_UNKNOWN
53 # define ICMP_UNREACH_NET_UNKNOWN 6
54 # define ICMP_UNREACH_HOST_UNKNOWN 7
55 # define ICMP_UNREACH_ISOLATED 8
56 # define ICMP_UNREACH_NET_PROHIB 9
57 # define ICMP_UNREACH_HOST_PROHIB 10
58 # define ICMP_UNREACH_TOSNET 11
59 # define ICMP_UNREACH_TOSHOST 12
60 #endif
61 /* tru64 has the ones above, but not these */
62 #ifndef ICMP_UNREACH_FILTER_PROHIB
63 # define ICMP_UNREACH_FILTER_PROHIB 13
64 # define ICMP_UNREACH_HOST_PRECEDENCE 14
65 # define ICMP_UNREACH_PRECEDENCE_CUTOFF 15
66 #endif
69 /** typedefs and such **/
70 enum states {
71 STATE_OK = 0,
72 STATE_WARNING,
73 STATE_CRITICAL,
74 STATE_UNKNOWN,
75 STATE_DEPENDENT,
76 STATE_OOB
77 };
79 typedef unsigned short range_t; /* type for get_range() -- unimplemented */
81 typedef struct rta_host {
82 unsigned short id; /* id in **table, and icmp pkts */
83 char *name; /* arg used for adding this host */
84 char *msg; /* icmp error message, if any */
85 struct sockaddr_in saddr_in; /* the address of this host */
86 struct in_addr error_addr; /* stores address of error replies */
87 unsigned long long time_waited; /* total time waited, in usecs */
88 unsigned int icmp_sent, icmp_recv, icmp_lost; /* counters */
89 unsigned char icmp_type, icmp_code; /* type and code from errors */
90 unsigned short flags; /* control/status flags */
91 double rta; /* measured RTA */
92 unsigned char pl; /* measured packet loss */
93 struct rta_host *next; /* linked list */
94 } rta_host;
96 #define FLAG_LOST_CAUSE 0x01 /* decidedly dead target. */
98 /* threshold structure. all values are maximum allowed, exclusive */
99 typedef struct threshold {
100 unsigned char pl; /* max allowed packet loss in percent */
101 unsigned int rta; /* roundtrip time average, microseconds */
102 } threshold;
104 /* the data structure */
105 typedef struct icmp_ping_data {
106 struct timeval stime; /* timestamp (saved in protocol struct as well) */
107 unsigned short ping_id;
108 } icmp_ping_data;
110 /* the different modes of this program are as follows:
111 * MODE_RTA: send all packets no matter what (mimic check_icmp and check_ping)
112 * MODE_HOSTCHECK: Return immediately upon any sign of life
113 * In addition, sends packets to ALL addresses assigned
114 * to this host (as returned by gethostbyname() or
115 * gethostbyaddr() and expects one host only to be checked at
116 * a time. Therefore, any packet response what so ever will
117 * count as a sign of life, even when received outside
118 * crit.rta limit. Do not misspell any additional IP's.
119 * MODE_ALL: Requires packets from ALL requested IP to return OK (default).
120 * MODE_ICMP: implement something similar to check_icmp (MODE_RTA without
121 * tcp and udp args does this)
122 */
123 #define MODE_RTA 0
124 #define MODE_HOSTCHECK 1
125 #define MODE_ALL 2
126 #define MODE_ICMP 3
128 /* the different ping types we can do
129 * TODO: investigate ARP ping as well */
130 #define HAVE_ICMP 1
131 #define HAVE_UDP 2
132 #define HAVE_TCP 4
133 #define HAVE_ARP 8
135 #define MIN_PING_DATA_SIZE sizeof(struct icmp_ping_data)
136 #define MAX_IP_PKT_SIZE 65536 /* (theoretical) max IP packet size */
137 #define IP_HDR_SIZE 20
138 #define MAX_PING_DATA (MAX_IP_PKT_SIZE - IP_HDR_SIZE - ICMP_MINLEN)
139 #define DEFAULT_PING_DATA_SIZE (MIN_PING_DATA_SIZE + 44)
141 /* various target states */
142 #define TSTATE_INACTIVE 0x01 /* don't ping this host anymore */
143 #define TSTATE_WAITING 0x02 /* unanswered packets on the wire */
144 #define TSTATE_ALIVE 0x04 /* target is alive (has answered something) */
145 #define TSTATE_UNREACH 0x08
147 /** prototypes **/
148 static void usage(unsigned char, char *);
149 static u_int get_timevar(const char *);
150 static u_int get_timevaldiff(struct timeval *, struct timeval *);
151 static int wait_for_reply(int, u_int);
152 static int recvfrom_wto(int, char *, unsigned int, struct sockaddr *, u_int *);
153 static int send_icmp_ping(int, struct rta_host *);
154 static int get_threshold(char *str, threshold *th);
155 static void run_checks(void);
156 static int add_target(char *);
157 static int add_target_ip(char *, struct in_addr *);
158 static int handle_random_icmp(struct icmp *, struct sockaddr_in *);
159 static unsigned short icmp_checksum(unsigned short *, int);
160 static void finish(int);
161 static void crash(const char *, ...);
163 /** external **/
164 extern int optind, opterr, optopt;
165 extern char *optarg;
166 extern char **environ;
168 /** global variables **/
169 static char *progname;
170 static struct rta_host **table, *cursor, *list;
171 static threshold crit = {80, 500000}, warn = {40, 200000};
172 static int mode, protocols, sockets, debug = 0, timeout = 10;
173 static unsigned short icmp_pkt_size, icmp_data_size = DEFAULT_PING_DATA_SIZE;
174 static unsigned int icmp_sent = 0, icmp_recv = 0, icmp_lost = 0;
175 #define icmp_pkts_en_route (icmp_sent - (icmp_recv + icmp_lost))
176 static unsigned short targets_down = 0, targets = 0, packets = 0;
177 #define targets_alive (targets - targets_down)
178 static unsigned int retry_interval, pkt_interval, target_interval;
179 static int icmp_sock, tcp_sock, udp_sock, status = STATE_OK;
180 static pid_t pid;
181 static struct timezone tz;
182 static struct timeval prog_start;
183 static unsigned long long max_completion_time = 0;
184 static unsigned char ttl = 0; /* outgoing ttl */
185 static unsigned int warn_down = 1, crit_down = 1; /* host down threshold values */
186 float pkt_backoff_factor = 1.5;
187 float target_backoff_factor = 1.5;
189 /** code start **/
190 static void
191 crash(const char *fmt, ...)
192 {
193 va_list ap;
195 printf("%s: ", progname);
197 va_start(ap, fmt);
198 vprintf(fmt, ap);
199 va_end(ap);
201 if(errno) printf(": %s", strerror(errno));
202 puts("");
204 exit(3);
205 }
208 static char *
209 get_icmp_error_msg(unsigned char icmp_type, unsigned char icmp_code)
210 {
211 char *msg = "unreachable";
213 if(debug > 1) printf("get_icmp_error_msg(%u, %u)\n", icmp_type, icmp_code);
214 switch(icmp_type) {
215 case ICMP_UNREACH:
216 switch(icmp_code) {
217 case ICMP_UNREACH_NET: msg = "Net unreachable"; break;
218 case ICMP_UNREACH_HOST: msg = "Host unreachable"; break;
219 case ICMP_UNREACH_PROTOCOL: msg = "Protocol unreachable (firewall?)"; break;
220 case ICMP_UNREACH_PORT: msg = "Port unreachable (firewall?)"; break;
221 case ICMP_UNREACH_NEEDFRAG: msg = "Fragmentation needed"; break;
222 case ICMP_UNREACH_SRCFAIL: msg = "Source route failed"; break;
223 case ICMP_UNREACH_ISOLATED: msg = "Source host isolated"; break;
224 case ICMP_UNREACH_NET_UNKNOWN: msg = "Unknown network"; break;
225 case ICMP_UNREACH_HOST_UNKNOWN: msg = "Unknown host"; break;
226 case ICMP_UNREACH_NET_PROHIB: msg = "Network denied (firewall?)"; break;
227 case ICMP_UNREACH_HOST_PROHIB: msg = "Host denied (firewall?)"; break;
228 case ICMP_UNREACH_TOSNET: msg = "Bad TOS for network (firewall?)"; break;
229 case ICMP_UNREACH_TOSHOST: msg = "Bad TOS for host (firewall?)"; break;
230 case ICMP_UNREACH_FILTER_PROHIB: msg = "Prohibited by filter (firewall)"; break;
231 case ICMP_UNREACH_HOST_PRECEDENCE: msg = "Host precedence violation"; break;
232 case ICMP_UNREACH_PRECEDENCE_CUTOFF: msg = "Precedence cutoff"; break;
233 default: msg = "Invalid code"; break;
234 }
235 break;
237 case ICMP_TIMXCEED:
238 /* really 'out of reach', or non-existant host behind a router serving
239 * two different subnets */
240 switch(icmp_code) {
241 case ICMP_TIMXCEED_INTRANS: msg = "Time to live exceeded in transit"; break;
242 case ICMP_TIMXCEED_REASS: msg = "Fragment reassembly time exceeded"; break;
243 default: msg = "Invalid code"; break;
244 }
245 break;
247 case ICMP_SOURCEQUENCH: msg = "Transmitting too fast"; break;
248 case ICMP_REDIRECT: msg = "Redirect (change route)"; break;
249 case ICMP_PARAMPROB: msg = "Bad IP header (required option absent)"; break;
251 /* the following aren't error messages, so ignore */
252 case ICMP_TSTAMP:
253 case ICMP_TSTAMPREPLY:
254 case ICMP_IREQ:
255 case ICMP_IREQREPLY:
256 case ICMP_MASKREQ:
257 case ICMP_MASKREPLY:
258 default: msg = ""; break;
259 }
261 return msg;
262 }
264 static int
265 handle_random_icmp(struct icmp *p, struct sockaddr_in *addr)
266 {
267 struct icmp *sent_icmp = NULL;
268 struct rta_host *host = NULL;
269 unsigned char *ptr;
271 if(p->icmp_type == ICMP_ECHO && p->icmp_id == pid) {
272 /* echo request from us to us (pinging localhost) */
273 return 0;
274 }
276 ptr = (unsigned char *)p;
277 if(debug) printf("handle_random_icmp(%p, %p)\n", (void *)p, (void *)addr);
279 /* only handle a few types, since others can't possibly be replies to
280 * us in a sane network (if it is anyway, it will be counted as lost
281 * at summary time, but not as quickly as a proper response */
282 /* TIMXCEED can be an unreach from a router with multiple IP's which
283 * serves two different subnets on the same interface and a dead host
284 * on one net is pinged from the other. The router will respond to
285 * itself and thus set TTL=0 so as to not loop forever. Even when
286 * TIMXCEED actually sends a proper icmp response we will have passed
287 * too many hops to have a hope of reaching it later, in which case it
288 * indicates overconfidence in the network, poor routing or both. */
289 if(p->icmp_type != ICMP_UNREACH && p->icmp_type != ICMP_TIMXCEED &&
290 p->icmp_type != ICMP_SOURCEQUENCH && p->icmp_type != ICMP_PARAMPROB)
291 {
292 return 0;
293 }
295 /* might be for us. At least it holds the original package (according
296 * to RFC 792). If it isn't, just ignore it */
297 sent_icmp = (struct icmp *)(ptr + 28);
298 if(sent_icmp->icmp_type != ICMP_ECHO || sent_icmp->icmp_id != pid ||
299 sent_icmp->icmp_seq >= targets)
300 {
301 if(debug) printf("Packet is no response to a packet we sent\n");
302 return 0;
303 }
305 /* it is indeed a response for us */
306 host = table[sent_icmp->icmp_seq];
307 if(debug) {
308 printf("Received \"%s\" from %s for ICMP ECHO sent to %s.\n",
309 get_icmp_error_msg(p->icmp_type, p->icmp_code),
310 inet_ntoa(addr->sin_addr), host->name);
311 }
313 icmp_lost++;
314 host->icmp_lost++;
315 /* don't spend time on lost hosts any more */
316 if(host->flags & FLAG_LOST_CAUSE) return 0;
318 /* source quench means we're sending too fast, so increase the
319 * interval and mark this packet lost */
320 if(p->icmp_type == ICMP_SOURCEQUENCH) {
321 pkt_interval *= pkt_backoff_factor;
322 target_interval *= target_backoff_factor;
323 }
324 else {
325 targets_down++;
326 host->flags |= FLAG_LOST_CAUSE;
327 }
328 host->icmp_type = p->icmp_type;
329 host->icmp_code = p->icmp_code;
330 host->error_addr.s_addr = addr->sin_addr.s_addr;
332 return 0;
333 }
335 int
336 main(int argc, char **argv)
337 {
338 int i;
339 char *ptr;
340 long int arg;
341 int icmp_sockerrno, udp_sockerrno, tcp_sockerrno;
342 int result;
343 struct rta_host *host;
345 /* we only need to be setsuid when we get the sockets, so do
346 * that before pointer magic (esp. on network data) */
347 icmp_sockerrno = udp_sockerrno = tcp_sockerrno = sockets = 0;
349 if((icmp_sock = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) != -1)
350 sockets |= HAVE_ICMP;
351 else icmp_sockerrno = errno;
353 /* if((udp_sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) != -1) */
354 /* sockets |= HAVE_UDP; */
355 /* else udp_sockerrno = errno; */
357 /* if((tcp_sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) */
358 /* sockets |= HAVE_TCP; */
359 /* else tcp_sockerrno = errno; */
361 /* now drop privileges (no effect if not setsuid or geteuid() == 0) */
362 setuid(getuid());
364 /* POSIXLY_CORRECT might break things, so unset it (the portable way) */
365 environ = NULL;
367 /* use the pid to mark packets as ours */
368 pid = getpid();
369 /* printf("pid = %u\n", pid); */
371 /* get calling name the old-fashioned way for portability instead
372 * of relying on the glibc-ism __progname */
373 ptr = strrchr(argv[0], '/');
374 if(ptr) progname = &ptr[1];
375 else progname = argv[0];
377 /* now set defaults. Use progname to set them initially (allows for
378 * superfast check_host program when target host is up */
379 cursor = list = NULL;
380 table = NULL;
382 mode = MODE_RTA;
383 crit.rta = 500000;
384 crit.pl = 80;
385 warn.rta = 200000;
386 warn.pl = 40;
387 protocols = HAVE_ICMP | HAVE_UDP | HAVE_TCP;
388 pkt_interval = 80000; /* 80 msec packet interval by default */
389 packets = 5;
391 if(!strcmp(progname, "check_icmp") || !strcmp(progname, "check_ping")) {
392 mode = MODE_ICMP;
393 protocols = HAVE_ICMP;
394 }
395 else if(!strcmp(progname, "check_host")) {
396 mode = MODE_HOSTCHECK;
397 pkt_interval = 1000000;
398 packets = 5;
399 crit.rta = warn.rta = 1000000;
400 crit.pl = warn.pl = 100;
401 }
402 else if(!strcmp(progname, "check_rta_multi")) {
403 mode = MODE_ALL;
404 target_interval = 0;
405 pkt_interval = 50000;
406 packets = 5;
407 }
409 /* parse the arguments */
410 for(i = 1; i < argc; i++) {
411 while((arg = getopt(argc, argv, "vhVw:c:n:p:t:H:i:b:I:l:")) != EOF) {
412 switch(arg) {
413 case 'v':
414 debug++;
415 break;
416 case 'b':
417 /* silently ignored for now */
418 break;
419 case 'i':
420 pkt_interval = get_timevar(optarg);
421 break;
422 case 'I':
423 target_interval = get_timevar(optarg);
424 break;
425 case 'w':
426 get_threshold(optarg, &warn);
427 break;
428 case 'c':
429 get_threshold(optarg, &crit);
430 break;
431 case 'n':
432 case 'p':
433 packets = strtoul(optarg, NULL, 0);
434 break;
435 case 't':
436 timeout = strtoul(optarg, NULL, 0);
437 if(!timeout) timeout = 10;
438 break;
439 case 'H':
440 add_target(optarg);
441 break;
442 case 'l':
443 ttl = (unsigned char)strtoul(optarg, NULL, 0);
444 break;
445 case 'd': /* implement later, for cluster checks */
446 warn_down = (unsigned char)strtoul(optarg, &ptr, 0);
447 if(ptr) {
448 crit_down = (unsigned char)strtoul(ptr + 1, NULL, 0);
449 }
450 break;
451 case 'h': case 'V': default:
452 usage(arg, NULL);
453 break;
454 }
455 }
456 }
458 argv = &argv[optind];
459 while(*argv) {
460 add_target(*argv);
461 argv++;
462 }
463 if(!targets) {
464 errno = 0;
465 crash("No hosts to check");
466 exit(3);
467 }
469 if(!sockets) {
470 if(icmp_sock == -1) {
471 errno = icmp_sockerrno;
472 crash("Failed to obtain ICMP socket");
473 return -1;
474 }
475 /* if(udp_sock == -1) { */
476 /* errno = icmp_sockerrno; */
477 /* crash("Failed to obtain UDP socket"); */
478 /* return -1; */
479 /* } */
480 /* if(tcp_sock == -1) { */
481 /* errno = icmp_sockerrno; */
482 /* crash("Failed to obtain TCP socker"); */
483 /* return -1; */
484 /* } */
485 }
486 if(!ttl) ttl = 64;
488 if(icmp_sock) {
489 result = setsockopt(icmp_sock, SOL_IP, IP_TTL, &ttl, sizeof(ttl));
490 if(debug) {
491 if(result == -1) printf("setsockopt failed\n");
492 else printf("ttl set to %u\n", ttl);
493 }
494 }
496 /* stupid users should be able to give whatever thresholds they want
497 * (nothing will break if they do), but some anal plugin maintainer
498 * will probably add some printf() thing here later, so it might be
499 * best to at least show them where to do it. ;) */
500 if(warn.pl > crit.pl) warn.pl = crit.pl;
501 if(warn.rta > crit.rta) warn.rta = crit.rta;
502 if(warn_down > crit_down) crit_down = warn_down;
504 signal(SIGINT, finish);
505 signal(SIGHUP, finish);
506 signal(SIGTERM, finish);
507 signal(SIGALRM, finish);
508 if(debug) printf("Setting alarm timeout to %u seconds\n", timeout);
509 alarm(timeout);
511 /* make sure we don't wait any longer than necessary */
512 gettimeofday(&prog_start, &tz);
513 max_completion_time =
514 ((targets * packets * pkt_interval) + (targets * target_interval)) +
515 (targets * packets * crit.rta) + crit.rta;
517 if(debug) {
518 printf("packets: %u, targets: %u\n"
519 "target_interval: %0.3f, pkt_interval %0.3f\n"
520 "crit.rta: %0.3f\n"
521 "max_completion_time: %0.3f\n",
522 packets, targets,
523 (float)target_interval / 1000, (float)pkt_interval / 1000,
524 (float)crit.rta / 1000,
525 (float)max_completion_time / 1000);
526 }
528 if(debug) {
529 if(max_completion_time > (u_int)timeout * 1000000) {
530 printf("max_completion_time: %llu timeout: %u\n",
531 max_completion_time, timeout);
532 printf("Timout must be at lest %llu\n",
533 max_completion_time / 1000000 + 1);
534 }
535 }
537 icmp_pkt_size = icmp_data_size + ICMP_MINLEN;
538 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
539 if(icmp_pkt_size < sizeof(struct icmp) + sizeof(struct icmp_ping_data)) {
540 icmp_pkt_size = sizeof(struct icmp) + sizeof(struct icmp_ping_data);
541 }
542 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
544 if(debug) {
545 printf("crit = {%u, %u%%}, warn = {%u, %u%%}\n",
546 crit.rta, crit.pl, warn.rta, warn.pl);
547 printf("pkt_interval: %u target_interval: %u retry_interval: %u\n",
548 pkt_interval, target_interval, retry_interval);
549 printf("icmp_pkt_size: %u timeout: %u\n",
550 icmp_pkt_size, timeout);
551 }
553 if(packets > 20) {
554 errno = 0;
555 crash("packets is > 20 (%d)", packets);
556 }
558 host = list;
559 table = malloc(sizeof(struct rta_host **) * (argc - 1));
560 i = 0;
561 while(host) {
562 host->id = i;
563 table[i] = host;
564 host = host->next;
565 i++;
566 }
568 run_checks();
570 errno = 0;
571 finish(0);
573 return(0);
574 }
576 static void
577 run_checks()
578 {
579 u_int i, t, result;
580 u_int final_wait, time_passed;
582 /* this loop might actually violate the pkt_interval or target_interval
583 * settings, but only if there aren't any packets on the wire which
584 * indicates that the target can handle an increased packet rate */
585 for(i = 0; i < packets; i++) {
586 for(t = 0; t < targets; t++) {
587 /* don't send useless packets */
588 if(!targets_alive) finish(0);
589 if(table[t]->flags & FLAG_LOST_CAUSE) {
590 if(debug) printf("%s is a lost cause. not sending any more\n",
591 table[t]->name);
592 continue;
593 }
595 /* we're still in the game, so send next packet */
596 (void)send_icmp_ping(icmp_sock, table[t]);
597 result = wait_for_reply(icmp_sock, target_interval);
598 }
599 result = wait_for_reply(icmp_sock, pkt_interval * targets);
600 }
602 if(icmp_pkts_en_route && targets_alive) {
603 time_passed = get_timevaldiff(NULL, NULL);
604 final_wait = max_completion_time - time_passed;
606 if(debug) {
607 printf("time_passed: %u final_wait: %u max_completion_time: %llu\n",
608 time_passed, final_wait, max_completion_time);
609 }
610 if(time_passed > max_completion_time) {
611 if(debug) printf("Time passed. Finishing up\n");
612 finish(0);
613 }
615 /* catch the packets that might come in within the timeframe, but
616 * haven't yet */
617 if(debug) printf("Waiting for %u micro-seconds (%0.3f msecs)\n",
618 final_wait, (float)final_wait / 1000);
619 result = wait_for_reply(icmp_sock, final_wait);
620 }
621 }
623 /* response structure:
624 * ip header : 20 bytes
625 * icmp header : 28 bytes
626 * icmp echo reply : the rest
627 */
628 static int
629 wait_for_reply(int sock, u_int t)
630 {
631 int n, hlen;
632 static char buf[4096];
633 struct sockaddr_in resp_addr;
634 struct ip *ip;
635 struct icmp *icp, *sent_icmp;
636 struct rta_host *host;
637 struct icmp_ping_data *data;
638 struct timeval wait_start, now;
639 u_int tdiff, i, per_pkt_wait;
641 /* if we can't listen or don't have anything to listen to, just return */
642 if(!t || !icmp_pkts_en_route) return 0;
644 gettimeofday(&wait_start, &tz);
646 i = t;
647 per_pkt_wait = t / icmp_pkts_en_route;
648 while(icmp_pkts_en_route && get_timevaldiff(&wait_start, NULL) < i) {
649 t = per_pkt_wait;
651 /* wrap up if all targets are declared dead */
652 if(!targets_alive ||
653 get_timevaldiff(&prog_start, NULL) >= max_completion_time ||
654 (mode == MODE_HOSTCHECK && targets_down))
655 {
656 finish(0);
657 }
659 /* reap responses until we hit a timeout */
660 n = recvfrom_wto(sock, buf, sizeof(buf),
661 (struct sockaddr *)&resp_addr, &t);
662 if(!n) {
663 if(debug > 1) {
664 printf("recvfrom_wto() timed out during a %u usecs wait\n",
665 per_pkt_wait);
666 }
667 continue; /* timeout for this one, so keep trying */
668 }
669 if(n < 0) {
670 if(debug) printf("recvfrom_wto() returned errors\n");
671 return n;
672 }
674 ip = (struct ip *)buf;
675 if(debug > 1) printf("received %u bytes from %s\n",
676 ntohs(ip->ip_len), inet_ntoa(resp_addr.sin_addr));
678 /* obsolete. alpha on tru64 provides the necessary defines, but isn't broken */
679 /* #if defined( __alpha__ ) && __STDC__ && !defined( __GLIBC__ ) */
680 /* alpha headers are decidedly broken. Using an ansi compiler,
681 * they provide ip_vhl instead of ip_hl and ip_v, so we mask
682 * off the bottom 4 bits */
683 /* hlen = (ip->ip_vhl & 0x0f) << 2; */
684 /* #else */
685 hlen = ip->ip_hl << 2;
686 /* #endif */
688 if(n < (hlen + ICMP_MINLEN)) {
689 crash("received packet too short for ICMP (%d bytes, expected %d) from %s\n",
690 n, hlen + icmp_pkt_size, inet_ntoa(resp_addr.sin_addr));
691 }
692 /* else if(debug) { */
693 /* printf("ip header size: %u, packet size: %u (expected %u, %u)\n", */
694 /* hlen, ntohs(ip->ip_len) - hlen, */
695 /* sizeof(struct ip), icmp_pkt_size); */
696 /* } */
698 /* check the response */
699 icp = (struct icmp *)(buf + hlen);
700 sent_icmp = (struct icmp *)(buf + hlen + ICMP_MINLEN);
701 /* printf("buf: %p, icp: %p, distance: %u (expected %u)\n", */
702 /* buf, icp, */
703 /* (u_int)icp - (u_int)buf, hlen); */
704 /* printf("buf: %p, sent_icmp: %p, distance: %u (expected %u)\n", */
705 /* buf, sent_icmp, */
706 /* (u_int)sent_icmp - (u_int)buf, hlen + ICMP_MINLEN); */
708 if(icp->icmp_id != pid) {
709 handle_random_icmp(icp, &resp_addr);
710 continue;
711 }
713 if(icp->icmp_type != ICMP_ECHOREPLY || icp->icmp_seq >= targets) {
714 if(debug > 2) printf("not a proper ICMP_ECHOREPLY\n");
715 handle_random_icmp(icp, &resp_addr);
716 continue;
717 }
719 /* this is indeed a valid response */
720 data = (struct icmp_ping_data *)(icp->icmp_data);
722 host = table[icp->icmp_seq];
723 gettimeofday(&now, &tz);
724 tdiff = get_timevaldiff(&data->stime, &now);
726 host->time_waited += tdiff;
727 host->icmp_recv++;
728 icmp_recv++;
730 if(debug) {
731 printf("%0.3f ms rtt from %s, outgoing ttl: %u, incoming ttl: %u\n",
732 (float)tdiff / 1000, inet_ntoa(resp_addr.sin_addr),
733 ttl, ip->ip_ttl);
734 }
736 /* if we're in hostcheck mode, exit with limited printouts */
737 if(mode == MODE_HOSTCHECK) {
738 printf("OK - %s responds to ICMP. Packet %u, rta %0.3fms|"
739 "pkt=%u;;0;%u rta=%0.3f;%0.3f;%0.3f;;\n",
740 host->name, icmp_recv, (float)tdiff / 1000,
741 icmp_recv, packets, (float)tdiff / 1000,
742 (float)warn.rta / 1000, (float)crit.rta / 1000);
743 exit(STATE_OK);
744 }
745 }
747 return 0;
748 }
750 /* the ping functions */
751 static int
752 send_icmp_ping(int sock, struct rta_host *host)
753 {
754 static char *buf = NULL; /* re-use so we prevent leaks */
755 long int len;
756 struct icmp *icp;
757 struct icmp_ping_data *data;
758 struct timeval tv;
759 struct sockaddr *addr;
762 if(sock == -1) {
763 errno = 0;
764 crash("Attempt to send on bogus socket");
765 return -1;
766 }
767 addr = (struct sockaddr *)&host->saddr_in;
769 if(!buf) {
770 buf = (char *)malloc(icmp_pkt_size + sizeof(struct ip));
771 if(!buf) {
772 crash("send_icmp_ping(): failed to malloc %d bytes for send buffer",
773 icmp_pkt_size);
774 return -1; /* might be reached if we're in debug mode */
775 }
776 }
777 memset(buf, 0, icmp_pkt_size + sizeof(struct ip));
779 if((gettimeofday(&tv, &tz)) == -1) return -1;
781 icp = (struct icmp *)buf;
782 icp->icmp_type = ICMP_ECHO;
783 icp->icmp_code = 0;
784 icp->icmp_cksum = 0;
785 icp->icmp_id = pid;
786 icp->icmp_seq = host->id;
787 data = (struct icmp_ping_data *)icp->icmp_data;
788 data->ping_id = 10; /* host->icmp.icmp_sent; */
789 memcpy(&data->stime, &tv, sizeof(struct timeval));
790 icp->icmp_cksum = icmp_checksum((u_short *)icp, icmp_pkt_size);
792 len = sendto(sock, buf, icmp_pkt_size, 0, (struct sockaddr *)addr,
793 sizeof(struct sockaddr));
795 if(len < 0 || (unsigned int)len != icmp_pkt_size) {
796 if(debug) printf("Failed to send ping to %s\n",
797 inet_ntoa(host->saddr_in.sin_addr));
798 return -1;
799 }
801 icmp_sent++;
802 host->icmp_sent++;
804 return 0;
805 }
807 static int
808 recvfrom_wto(int sock, char *buf, unsigned int len, struct sockaddr *saddr,
809 u_int *timo)
810 {
811 u_int slen;
812 int n;
813 struct timeval to, then, now;
814 fd_set rd, wr;
816 if(!*timo) {
817 if(debug) printf("*timo is not\n");
818 return 0;
819 }
821 to.tv_sec = *timo / 1000000;
822 to.tv_usec = (*timo - (to.tv_sec * 1000000));
824 FD_ZERO(&rd);
825 FD_ZERO(&wr);
826 FD_SET(sock, &rd);
827 errno = 0;
828 gettimeofday(&then, &tz);
829 n = select(sock + 1, &rd, &wr, NULL, &to);
830 if(n < 0) crash("select() in recvfrom_wto");
831 gettimeofday(&now, &tz);
832 *timo = get_timevaldiff(&then, &now);
834 if(!n) return 0; /* timeout */
836 slen = sizeof(struct sockaddr);
838 return recvfrom(sock, buf, len, 0, saddr, &slen);
839 }
841 static void
842 finish(int sig)
843 {
844 u_int i = 0;
845 unsigned char pl;
846 double rta;
847 struct rta_host *host;
848 char *status_string[] =
849 {"OK", "WARNING", "CRITICAL", "UNKNOWN", "DEPENDENT"};
851 alarm(0);
852 if(debug > 1) printf("finish(%d) called\n", sig);
854 if(icmp_sock != -1) close(icmp_sock);
855 if(udp_sock != -1) close(udp_sock);
856 if(tcp_sock != -1) close(tcp_sock);
858 if(debug) {
859 printf("icmp_sent: %u icmp_recv: %u icmp_lost: %u\n",
860 icmp_sent, icmp_recv, icmp_lost);
861 printf("targets: %u targets_alive: %u\n", targets, targets_alive);
862 }
864 /* iterate thrice to calculate values, give output, and print perfparse */
865 host = list;
866 while(host) {
867 if(!host->icmp_recv) {
868 /* rta 0 is ofcourse not entirely correct, but will still show up
869 * conspicuosly as missing entries in perfparse and cacti */
870 pl = 100;
871 rta = 0;
872 status = STATE_CRITICAL;
873 /* up the down counter if not already counted */
874 if(!(host->flags & FLAG_LOST_CAUSE) && targets_alive) targets_down++;
875 }
876 else {
877 pl = ((host->icmp_sent - host->icmp_recv) * 100) / host->icmp_sent;
878 rta = (double)host->time_waited / host->icmp_recv;
879 }
880 host->pl = pl;
881 host->rta = rta;
882 if(!status && (pl >= warn.pl || rta >= warn.rta)) status = STATE_WARNING;
883 if(pl >= crit.pl || rta >= crit.rta) status = STATE_CRITICAL;
885 host = host->next;
886 }
887 /* this is inevitable */
888 if(!targets_alive) status = STATE_CRITICAL;
889 printf("%s - ", status_string[status]);
891 host = list;
892 while(host) {
893 if(debug) puts("");
894 if(i) {
895 if(i < targets) printf(" :: ");
896 else printf("\n");
897 }
898 i++;
899 if(!host->icmp_recv) {
900 status = STATE_CRITICAL;
901 if(host->flags & FLAG_LOST_CAUSE) {
902 printf("%s: %s @ %s. rta nan, lost %d%%",
903 host->name,
904 get_icmp_error_msg(host->icmp_type, host->icmp_code),
905 inet_ntoa(host->error_addr),
906 100);
907 }
908 else { /* not marked as lost cause, so we have no flags for it */
909 printf("%s: rta nan, lost 100%%", host->name);
910 }
911 }
912 else { /* !icmp_recv */
913 printf("%s: rta %0.3fms, lost %u%%",
914 host->name, host->rta / 1000, host->pl);
915 }
917 host = host->next;
918 }
920 /* iterate once more for pretty perfparse output */
921 printf("|");
922 i = 0;
923 host = list;
924 while(host) {
925 if(debug) puts("");
926 printf("%srta=%0.3fms;%0.3f;%0.3f;0; %spl=%u%%;%u;%u;; ",
927 (targets > 1) ? host->name : "",
928 host->rta / 1000, (float)warn.rta / 1000, (float)crit.rta / 1000,
929 (targets > 1) ? host->name : "",
930 host->pl, warn.pl, crit.pl);
932 host = host->next;
933 }
935 /* finish with an empty line */
936 puts("");
937 if(debug) printf("targets: %u, targets_alive: %u\n",
938 targets, targets_alive);
940 exit(status);
941 }
943 static u_int
944 get_timevaldiff(struct timeval *early, struct timeval *later)
945 {
946 u_int ret;
947 struct timeval now;
949 if(!later) {
950 gettimeofday(&now, &tz);
951 later = &now;
952 }
953 if(!early) early = &prog_start;
955 /* if early > later we return 0 so as to indicate a timeout */
956 if(early->tv_sec > early->tv_sec ||
957 (early->tv_sec == later->tv_sec && early->tv_usec > later->tv_usec))
958 {
959 return 0;
960 }
962 ret = (later->tv_sec - early->tv_sec) * 1000000;
963 ret += later->tv_usec - early->tv_usec;
965 return ret;
966 }
968 static int
969 add_target_ip(char *arg, struct in_addr *in)
970 {
971 struct rta_host *host;
973 /* disregard obviously stupid addresses */
974 if(in->s_addr == INADDR_NONE || in->s_addr == INADDR_ANY)
975 return -1;
977 /* no point in adding two identical IP's, so don't. ;) */
978 host = list;
979 while(host) {
980 if(host->saddr_in.sin_addr.s_addr == in->s_addr) {
981 if(debug) printf("Identical IP already exists. Not adding %s\n", arg);
982 return -1;
983 }
984 host = host->next;
985 }
987 /* add the fresh ip */
988 host = malloc(sizeof(struct rta_host));
989 if(!host) {
990 crash("add_target_ip(%s, %s): malloc(%d) failed",
991 arg, inet_ntoa(*in), sizeof(struct rta_host));
992 }
993 memset(host, 0, sizeof(struct rta_host));
995 /* set the values. use calling name for output */
996 host->name = strdup(arg);
998 /* fill out the sockaddr_in struct */
999 host->saddr_in.sin_family = AF_INET;
1000 host->saddr_in.sin_addr.s_addr = in->s_addr;
1002 if(!list) list = cursor = host;
1003 else cursor->next = host;
1005 cursor = host;
1006 targets++;
1008 return 0;
1009 }
1011 /* wrapper for add_target_ip */
1012 static int
1013 add_target(char *arg)
1014 {
1015 int i;
1016 struct hostent *he;
1017 struct in_addr *in, ip;
1019 /* don't resolve if we don't have to */
1020 if((ip.s_addr = inet_addr(arg)) != INADDR_NONE) {
1021 /* don't add all ip's if we were given a specific one */
1022 return add_target_ip(arg, &ip);
1023 /* he = gethostbyaddr((char *)in, sizeof(struct in_addr), AF_INET); */
1024 /* if(!he) return add_target_ip(arg, in); */
1025 }
1026 else {
1027 errno = 0;
1028 he = gethostbyname(arg);
1029 if(!he) {
1030 errno = 0;
1031 crash("Failed to resolve %s", arg);
1032 return -1;
1033 }
1034 }
1036 /* possibly add all the IP's as targets */
1037 for(i = 0; he->h_addr_list[i]; i++) {
1038 in = (struct in_addr *)he->h_addr_list[i];
1039 add_target_ip(arg, in);
1041 /* this is silly, but it works */
1042 if(mode == MODE_HOSTCHECK || mode == MODE_ALL) {
1043 printf("mode: %d\n", mode);
1044 continue;
1045 }
1046 break;
1047 }
1049 return 0;
1050 }
1051 /*
1052 * u = micro
1053 * m = milli
1054 * s = seconds
1055 * return value is in microseconds
1056 */
1057 static u_int
1058 get_timevar(const char *str)
1059 {
1060 char p, u, *ptr;
1061 unsigned int len;
1062 u_int i, d; /* integer and decimal, respectively */
1063 u_int factor = 1000; /* default to milliseconds */
1065 if(!str) return 0;
1066 len = strlen(str);
1067 if(!len) return 0;
1069 /* unit might be given as ms|m (millisec),
1070 * us|u (microsec) or just plain s, for seconds */
1071 u = p = '\0';
1072 u = str[len - 1];
1073 if(len >= 2 && !isdigit((int)str[len - 2])) p = str[len - 2];
1074 if(p && u == 's') u = p;
1075 else if(!p) p = u;
1076 if(debug > 2) printf("evaluating %s, u: %c, p: %c\n", str, u, p);
1078 if(u == 'u') factor = 1; /* microseconds */
1079 else if(u == 'm') factor = 1000; /* milliseconds */
1080 else if(u == 's') factor = 1000000; /* seconds */
1081 if(debug > 2) printf("factor is %u\n", factor);
1083 i = strtoul(str, &ptr, 0);
1084 if(!ptr || *ptr != '.' || strlen(ptr) < 2 || factor == 1)
1085 return i * factor;
1087 /* time specified in usecs can't have decimal points, so ignore them */
1088 if(factor == 1) return i;
1090 d = strtoul(ptr + 1, NULL, 0);
1092 /* d is decimal, so get rid of excess digits */
1093 while(d >= factor) d /= 10;
1095 /* the last parenthesis avoids floating point exceptions. */
1096 return ((i * factor) + (d * (factor / 10)));
1097 }
1099 /* not too good at checking errors, but it'll do (main() should barfe on -1) */
1100 static int
1101 get_threshold(char *str, threshold *th)
1102 {
1103 char *p = NULL, i = 0;
1105 if(!str || !strlen(str) || !th) return -1;
1107 /* pointer magic slims code by 10 lines. i is bof-stop on stupid libc's */
1108 p = &str[strlen(str) - 1];
1109 while(p != &str[1]) {
1110 if(*p == '%') *p = '\0';
1111 else if(*p == ',' && i) {
1112 *p = '\0'; /* reset it so get_timevar(str) works nicely later */
1113 th->pl = (unsigned char)strtoul(p+1, NULL, 0);
1114 break;
1115 }
1116 i = 1;
1117 p--;
1118 }
1119 th->rta = get_timevar(str);
1121 if(!th->rta) return -1;
1123 if(th->rta > MAXTTL * 1000000) th->rta = MAXTTL * 1000000;
1124 if(th->pl > 100) th->pl = 100;
1126 return 0;
1127 }
1129 unsigned short
1130 icmp_checksum(unsigned short *p, int n)
1131 {
1132 register unsigned short cksum;
1133 register long sum = 0;
1135 while(n > 1) {
1136 sum += *p++;
1137 n -= 2;
1138 }
1140 /* mop up the occasional odd byte */
1141 if(n == 1) sum += (unsigned char)*p;
1143 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
1144 sum += (sum >> 16); /* add carry */
1145 cksum = ~sum; /* ones-complement, trunc to 16 bits */
1147 return cksum;
1148 }
1150 /* make core plugin developers happy (silly, really) */
1151 static void
1152 usage(unsigned char arg, char *msg)
1153 {
1154 if(msg) printf("%s: %s\n", progname, msg);
1156 if(arg == 'V') {
1157 printf("$Id$\n");
1158 exit(STATE_UNKNOWN);
1159 }
1161 printf("Usage: %s [options] [-H] host1 host2 hostn\n\n", progname);
1163 if(arg != 'h') exit(3);
1165 printf("Where options are any combination of:\n"
1166 " * -H | --host specify a target\n"
1167 " * -w | --warn warning threshold (currently %0.3fms,%u%%)\n"
1168 " * -c | --crit critical threshold (currently %0.3fms,%u%%)\n"
1169 " * -n | --packets number of packets to send (currently %u)\n"
1170 " * -i | --interval max packet interval (currently %0.3fms)\n"
1171 " * -I | --hostint max target interval (currently %0.3fms)\n"
1172 " * -l | --ttl TTL on outgoing packets (currently %u)\n"
1173 " * -t | --timeout timeout value (seconds, currently %u)\n"
1174 " * -b | --bytes icmp packet size (currenly ignored)\n"
1175 " -v | --verbose verbosity++\n"
1176 " -h | --help this cruft\n",
1177 (float)warn.rta / 1000, warn.pl, (float)crit.rta / 1000, crit.pl,
1178 packets,
1179 (float)pkt_interval / 1000, (float)target_interval / 1000,
1180 ttl, timeout);
1182 puts("\nThe -H switch is optional. Naming a host (or several) to check is not.\n\n"
1183 "Threshold format for -w and -c is 200.25,60% for 200.25 msec RTA and 60%\n"
1184 "packet loss. The default values should work well for most users.\n"
1185 "You can specify different RTA factors using the standardized abbreviations\n"
1186 "us (microseconds), ms (milliseconds, default) or just plain s for seconds.\n\n"
1187 "Threshold format for -d is warn,crit. 12,14 means WARNING if >= 12 hops\n"
1188 "are spent and CRITICAL if >= 14 hops are spent.\n"
1189 "NOTE: Some systems decrease TTL when forming ICMP_ECHOREPLY, others do not.\n\n"
1190 "The -v switch can be specified several times for increased verbosity.\n\n"
1191 "Long options are currently unsupported.\n\n"
1192 "Options marked with * require an argument\n");
1194 puts("The latest version of this plugin can be found at http://oss.op5.se/nagios\n"
1195 "or https://devel.op5.se/oss until the day it is included in the official\n"
1196 "plugin distribution.\n");
1198 exit(3);
1199 }