1 /*
2 * $Id$
3 *
4 * Author: Andreas Ericsson <ae@op5.se>
5 *
6 * License: GNU GPL 2.0 or any later version.
7 *
8 * Relevant RFC's: 792 (ICMP), 791 (IP)
9 *
10 * This program was modeled somewhat after the check_icmp program,
11 * which was in turn a hack of fping (www.fping.org) but has been
12 * completely rewritten since to generate higher precision rta values,
13 * and support several different modes as well as setting ttl to control.
14 * redundant routes. The only remainders of fping is currently a few
15 * function names.
16 *
17 */
19 #include <sys/time.h>
20 #include <sys/types.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <stdarg.h>
24 #include <unistd.h>
25 #include <stddef.h>
26 #include <errno.h>
27 #include <string.h>
28 #include <ctype.h>
29 #include <netdb.h>
30 #include <sys/socket.h>
31 #include <netinet/in_systm.h>
32 #include <netinet/in.h>
33 #include <netinet/ip.h>
34 #include <netinet/ip_icmp.h>
35 #include <arpa/inet.h>
36 #include <signal.h>
38 #include "common.h"
39 #include "runcmd.h"
40 #include "utils.h"
41 #include "regex.h"
43 /** sometimes undefined system macros (quite a few, actually) **/
44 #ifndef MAXTTL
45 # define MAXTTL 255
46 #endif
47 #ifndef INADDR_NONE
48 # define INADDR_NONE 0xffffffU
49 #endif
51 #ifndef SOL_IP
52 #define SOL_IP 0
53 #endif
55 /* we bundle these in one #ifndef, since they're all from BSD
56 * Put individual #ifndef's around those that bother you */
57 #ifndef ICMP_UNREACH_NET_UNKNOWN
58 # define ICMP_UNREACH_NET_UNKNOWN 6
59 # define ICMP_UNREACH_HOST_UNKNOWN 7
60 # define ICMP_UNREACH_ISOLATED 8
61 # define ICMP_UNREACH_NET_PROHIB 9
62 # define ICMP_UNREACH_HOST_PROHIB 10
63 # define ICMP_UNREACH_TOSNET 11
64 # define ICMP_UNREACH_TOSHOST 12
65 #endif
66 /* tru64 has the ones above, but not these */
67 #ifndef ICMP_UNREACH_FILTER_PROHIB
68 # define ICMP_UNREACH_FILTER_PROHIB 13
69 # define ICMP_UNREACH_HOST_PRECEDENCE 14
70 # define ICMP_UNREACH_PRECEDENCE_CUTOFF 15
71 #endif
74 /** typedefs and such **/
75 enum states {
76 STATE_OK = 0,
77 STATE_WARNING,
78 STATE_CRITICAL,
79 STATE_UNKNOWN,
80 STATE_DEPENDENT,
81 STATE_OOB
82 };
84 typedef unsigned short range_t; /* type for get_range() -- unimplemented */
86 typedef struct rta_host {
87 unsigned short id; /* id in **table, and icmp pkts */
88 char *name; /* arg used for adding this host */
89 char *msg; /* icmp error message, if any */
90 struct sockaddr_in saddr_in; /* the address of this host */
91 struct in_addr error_addr; /* stores address of error replies */
92 unsigned long long time_waited; /* total time waited, in usecs */
93 unsigned int icmp_sent, icmp_recv, icmp_lost; /* counters */
94 unsigned char icmp_type, icmp_code; /* type and code from errors */
95 unsigned short flags; /* control/status flags */
96 double rta; /* measured RTA */
97 unsigned char pl; /* measured packet loss */
98 struct rta_host *next; /* linked list */
99 } rta_host;
101 #define FLAG_LOST_CAUSE 0x01 /* decidedly dead target. */
103 /* threshold structure. all values are maximum allowed, exclusive */
104 typedef struct threshold {
105 unsigned char pl; /* max allowed packet loss in percent */
106 unsigned int rta; /* roundtrip time average, microseconds */
107 } threshold;
109 /* the data structure */
110 typedef struct icmp_ping_data {
111 struct timeval stime; /* timestamp (saved in protocol struct as well) */
112 unsigned short ping_id;
113 } icmp_ping_data;
115 /* the different modes of this program are as follows:
116 * MODE_RTA: send all packets no matter what (mimic check_icmp and check_ping)
117 * MODE_HOSTCHECK: Return immediately upon any sign of life
118 * In addition, sends packets to ALL addresses assigned
119 * to this host (as returned by gethostbyname() or
120 * gethostbyaddr() and expects one host only to be checked at
121 * a time. Therefore, any packet response what so ever will
122 * count as a sign of life, even when received outside
123 * crit.rta limit. Do not misspell any additional IP's.
124 * MODE_ALL: Requires packets from ALL requested IP to return OK (default).
125 * MODE_ICMP: implement something similar to check_icmp (MODE_RTA without
126 * tcp and udp args does this)
127 */
128 #define MODE_RTA 0
129 #define MODE_HOSTCHECK 1
130 #define MODE_ALL 2
131 #define MODE_ICMP 3
133 /* the different ping types we can do
134 * TODO: investigate ARP ping as well */
135 #define HAVE_ICMP 1
136 #define HAVE_UDP 2
137 #define HAVE_TCP 4
138 #define HAVE_ARP 8
140 #define MIN_PING_DATA_SIZE sizeof(struct icmp_ping_data)
141 #define MAX_IP_PKT_SIZE 65536 /* (theoretical) max IP packet size */
142 #define IP_HDR_SIZE 20
143 #define MAX_PING_DATA (MAX_IP_PKT_SIZE - IP_HDR_SIZE - ICMP_MINLEN)
144 #define DEFAULT_PING_DATA_SIZE (MIN_PING_DATA_SIZE + 44)
146 /* various target states */
147 #define TSTATE_INACTIVE 0x01 /* don't ping this host anymore */
148 #define TSTATE_WAITING 0x02 /* unanswered packets on the wire */
149 #define TSTATE_ALIVE 0x04 /* target is alive (has answered something) */
150 #define TSTATE_UNREACH 0x08
152 /** prototypes **/
153 static void usage(unsigned char, char *);
154 static u_int get_timevar(const char *);
155 static u_int get_timevaldiff(struct timeval *, struct timeval *);
156 static int wait_for_reply(int, u_int);
157 static int recvfrom_wto(int, char *, unsigned int, struct sockaddr *, u_int *);
158 static int send_icmp_ping(int, struct rta_host *);
159 static int get_threshold(char *str, threshold *th);
160 static void run_checks(void);
161 static int add_target(char *);
162 static int add_target_ip(char *, struct in_addr *);
163 static int handle_random_icmp(struct icmp *, struct sockaddr_in *);
164 static unsigned short icmp_checksum(unsigned short *, int);
165 static void finish(int);
166 static void crash(const char *, ...);
168 /** external **/
169 extern int optind, opterr, optopt;
170 extern char *optarg;
171 extern char **environ;
173 /** global variables **/
174 static char *progname;
175 static struct rta_host **table, *cursor, *list;
176 static threshold crit = {80, 500000}, warn = {40, 200000};
177 static int mode, protocols, sockets, debug = 0, timeout = 10;
178 static unsigned short icmp_pkt_size, icmp_data_size = DEFAULT_PING_DATA_SIZE;
179 static unsigned int icmp_sent = 0, icmp_recv = 0, icmp_lost = 0;
180 #define icmp_pkts_en_route (icmp_sent - (icmp_recv + icmp_lost))
181 static unsigned short targets_down = 0, targets = 0, packets = 0;
182 #define targets_alive (targets - targets_down)
183 static unsigned int retry_interval, pkt_interval, target_interval;
184 static int icmp_sock, tcp_sock, udp_sock, status = STATE_OK;
185 static pid_t pid;
186 static struct timezone tz;
187 static struct timeval prog_start;
188 static unsigned long long max_completion_time = 0;
189 static unsigned char ttl = 0; /* outgoing ttl */
190 static unsigned int warn_down = 1, crit_down = 1; /* host down threshold values */
191 float pkt_backoff_factor = 1.5;
192 float target_backoff_factor = 1.5;
194 /** code start **/
195 static void
196 crash(const char *fmt, ...)
197 {
198 va_list ap;
200 printf("%s: ", progname);
202 va_start(ap, fmt);
203 vprintf(fmt, ap);
204 va_end(ap);
206 if(errno) printf(": %s", strerror(errno));
207 puts("");
209 exit(3);
210 }
213 static char *
214 get_icmp_error_msg(unsigned char icmp_type, unsigned char icmp_code)
215 {
216 char *msg = "unreachable";
218 if(debug > 1) printf("get_icmp_error_msg(%u, %u)\n", icmp_type, icmp_code);
219 switch(icmp_type) {
220 case ICMP_UNREACH:
221 switch(icmp_code) {
222 case ICMP_UNREACH_NET: msg = "Net unreachable"; break;
223 case ICMP_UNREACH_HOST: msg = "Host unreachable"; break;
224 case ICMP_UNREACH_PROTOCOL: msg = "Protocol unreachable (firewall?)"; break;
225 case ICMP_UNREACH_PORT: msg = "Port unreachable (firewall?)"; break;
226 case ICMP_UNREACH_NEEDFRAG: msg = "Fragmentation needed"; break;
227 case ICMP_UNREACH_SRCFAIL: msg = "Source route failed"; break;
228 case ICMP_UNREACH_ISOLATED: msg = "Source host isolated"; break;
229 case ICMP_UNREACH_NET_UNKNOWN: msg = "Unknown network"; break;
230 case ICMP_UNREACH_HOST_UNKNOWN: msg = "Unknown host"; break;
231 case ICMP_UNREACH_NET_PROHIB: msg = "Network denied (firewall?)"; break;
232 case ICMP_UNREACH_HOST_PROHIB: msg = "Host denied (firewall?)"; break;
233 case ICMP_UNREACH_TOSNET: msg = "Bad TOS for network (firewall?)"; break;
234 case ICMP_UNREACH_TOSHOST: msg = "Bad TOS for host (firewall?)"; break;
235 case ICMP_UNREACH_FILTER_PROHIB: msg = "Prohibited by filter (firewall)"; break;
236 case ICMP_UNREACH_HOST_PRECEDENCE: msg = "Host precedence violation"; break;
237 case ICMP_UNREACH_PRECEDENCE_CUTOFF: msg = "Precedence cutoff"; break;
238 default: msg = "Invalid code"; break;
239 }
240 break;
242 case ICMP_TIMXCEED:
243 /* really 'out of reach', or non-existant host behind a router serving
244 * two different subnets */
245 switch(icmp_code) {
246 case ICMP_TIMXCEED_INTRANS: msg = "Time to live exceeded in transit"; break;
247 case ICMP_TIMXCEED_REASS: msg = "Fragment reassembly time exceeded"; break;
248 default: msg = "Invalid code"; break;
249 }
250 break;
252 case ICMP_SOURCEQUENCH: msg = "Transmitting too fast"; break;
253 case ICMP_REDIRECT: msg = "Redirect (change route)"; break;
254 case ICMP_PARAMPROB: msg = "Bad IP header (required option absent)"; break;
256 /* the following aren't error messages, so ignore */
257 case ICMP_TSTAMP:
258 case ICMP_TSTAMPREPLY:
259 case ICMP_IREQ:
260 case ICMP_IREQREPLY:
261 case ICMP_MASKREQ:
262 case ICMP_MASKREPLY:
263 default: msg = ""; break;
264 }
266 return msg;
267 }
269 static int
270 handle_random_icmp(struct icmp *p, struct sockaddr_in *addr)
271 {
272 struct icmp *sent_icmp = NULL;
273 struct rta_host *host = NULL;
274 unsigned char *ptr;
276 if(p->icmp_type == ICMP_ECHO && p->icmp_id == pid) {
277 /* echo request from us to us (pinging localhost) */
278 return 0;
279 }
281 ptr = (unsigned char *)p;
282 if(debug) printf("handle_random_icmp(%p, %p)\n", (void *)p, (void *)addr);
284 /* only handle a few types, since others can't possibly be replies to
285 * us in a sane network (if it is anyway, it will be counted as lost
286 * at summary time, but not as quickly as a proper response */
287 /* TIMXCEED can be an unreach from a router with multiple IP's which
288 * serves two different subnets on the same interface and a dead host
289 * on one net is pinged from the other. The router will respond to
290 * itself and thus set TTL=0 so as to not loop forever. Even when
291 * TIMXCEED actually sends a proper icmp response we will have passed
292 * too many hops to have a hope of reaching it later, in which case it
293 * indicates overconfidence in the network, poor routing or both. */
294 if(p->icmp_type != ICMP_UNREACH && p->icmp_type != ICMP_TIMXCEED &&
295 p->icmp_type != ICMP_SOURCEQUENCH && p->icmp_type != ICMP_PARAMPROB)
296 {
297 return 0;
298 }
300 /* might be for us. At least it holds the original package (according
301 * to RFC 792). If it isn't, just ignore it */
302 sent_icmp = (struct icmp *)(ptr + 28);
303 if(sent_icmp->icmp_type != ICMP_ECHO || sent_icmp->icmp_id != pid ||
304 sent_icmp->icmp_seq >= targets)
305 {
306 if(debug) printf("Packet is no response to a packet we sent\n");
307 return 0;
308 }
310 /* it is indeed a response for us */
311 host = table[sent_icmp->icmp_seq];
312 if(debug) {
313 printf("Received \"%s\" from %s for ICMP ECHO sent to %s.\n",
314 get_icmp_error_msg(p->icmp_type, p->icmp_code),
315 inet_ntoa(addr->sin_addr), host->name);
316 }
318 icmp_lost++;
319 host->icmp_lost++;
320 /* don't spend time on lost hosts any more */
321 if(host->flags & FLAG_LOST_CAUSE) return 0;
323 /* source quench means we're sending too fast, so increase the
324 * interval and mark this packet lost */
325 if(p->icmp_type == ICMP_SOURCEQUENCH) {
326 pkt_interval *= pkt_backoff_factor;
327 target_interval *= target_backoff_factor;
328 }
329 else {
330 targets_down++;
331 host->flags |= FLAG_LOST_CAUSE;
332 }
333 host->icmp_type = p->icmp_type;
334 host->icmp_code = p->icmp_code;
335 host->error_addr.s_addr = addr->sin_addr.s_addr;
337 return 0;
338 }
340 int
341 main(int argc, char **argv)
342 {
343 int i;
344 char *ptr;
345 long int arg;
346 int icmp_sockerrno, udp_sockerrno, tcp_sockerrno;
347 int result;
348 struct rta_host *host;
350 /* we only need to be setsuid when we get the sockets, so do
351 * that before pointer magic (esp. on network data) */
352 icmp_sockerrno = udp_sockerrno = tcp_sockerrno = sockets = 0;
354 if((icmp_sock = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) != -1)
355 sockets |= HAVE_ICMP;
356 else icmp_sockerrno = errno;
358 /* if((udp_sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) != -1) */
359 /* sockets |= HAVE_UDP; */
360 /* else udp_sockerrno = errno; */
362 /* if((tcp_sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) */
363 /* sockets |= HAVE_TCP; */
364 /* else tcp_sockerrno = errno; */
366 /* now drop privileges (no effect if not setsuid or geteuid() == 0) */
367 setuid(getuid());
369 /* POSIXLY_CORRECT might break things, so unset it (the portable way) */
370 environ = NULL;
372 /* use the pid to mark packets as ours */
373 pid = getpid();
374 /* printf("pid = %u\n", pid); */
376 /* get calling name the old-fashioned way for portability instead
377 * of relying on the glibc-ism __progname */
378 ptr = strrchr(argv[0], '/');
379 if(ptr) progname = &ptr[1];
380 else progname = argv[0];
382 /* now set defaults. Use progname to set them initially (allows for
383 * superfast check_host program when target host is up */
384 cursor = list = NULL;
385 table = NULL;
387 mode = MODE_RTA;
388 crit.rta = 500000;
389 crit.pl = 80;
390 warn.rta = 200000;
391 warn.pl = 40;
392 protocols = HAVE_ICMP | HAVE_UDP | HAVE_TCP;
393 pkt_interval = 80000; /* 80 msec packet interval by default */
394 packets = 5;
396 if(!strcmp(progname, "check_icmp") || !strcmp(progname, "check_ping")) {
397 mode = MODE_ICMP;
398 protocols = HAVE_ICMP;
399 }
400 else if(!strcmp(progname, "check_host")) {
401 mode = MODE_HOSTCHECK;
402 pkt_interval = 1000000;
403 packets = 5;
404 crit.rta = warn.rta = 1000000;
405 crit.pl = warn.pl = 100;
406 }
407 else if(!strcmp(progname, "check_rta_multi")) {
408 mode = MODE_ALL;
409 target_interval = 0;
410 pkt_interval = 50000;
411 packets = 5;
412 }
414 /* parse the arguments */
415 for(i = 1; i < argc; i++) {
416 while((arg = getopt(argc, argv, "vhVw:c:n:p:t:H:i:b:I:l:")) != EOF) {
417 switch(arg) {
418 case 'v':
419 debug++;
420 break;
421 case 'b':
422 /* silently ignored for now */
423 break;
424 case 'i':
425 pkt_interval = get_timevar(optarg);
426 break;
427 case 'I':
428 target_interval = get_timevar(optarg);
429 break;
430 case 'w':
431 get_threshold(optarg, &warn);
432 break;
433 case 'c':
434 get_threshold(optarg, &crit);
435 break;
436 case 'n':
437 case 'p':
438 packets = strtoul(optarg, NULL, 0);
439 break;
440 case 't':
441 timeout = strtoul(optarg, NULL, 0);
442 if(!timeout) timeout = 10;
443 break;
444 case 'H':
445 add_target(optarg);
446 break;
447 case 'l':
448 ttl = (unsigned char)strtoul(optarg, NULL, 0);
449 break;
450 case 'd': /* implement later, for cluster checks */
451 warn_down = (unsigned char)strtoul(optarg, &ptr, 0);
452 if(ptr) {
453 crit_down = (unsigned char)strtoul(ptr + 1, NULL, 0);
454 }
455 break;
456 case 'h': case 'V': default:
457 usage(arg, NULL);
458 break;
459 }
460 }
461 }
463 argv = &argv[optind];
464 while(*argv) {
465 add_target(*argv);
466 argv++;
467 }
468 if(!targets) {
469 errno = 0;
470 crash("No hosts to check");
471 exit(3);
472 }
474 if(!sockets) {
475 if(icmp_sock == -1) {
476 errno = icmp_sockerrno;
477 crash("Failed to obtain ICMP socket");
478 return -1;
479 }
480 /* if(udp_sock == -1) { */
481 /* errno = icmp_sockerrno; */
482 /* crash("Failed to obtain UDP socket"); */
483 /* return -1; */
484 /* } */
485 /* if(tcp_sock == -1) { */
486 /* errno = icmp_sockerrno; */
487 /* crash("Failed to obtain TCP socker"); */
488 /* return -1; */
489 /* } */
490 }
491 if(!ttl) ttl = 64;
493 if(icmp_sock) {
494 result = setsockopt(icmp_sock, SOL_IP, IP_TTL, &ttl, sizeof(ttl));
495 if(debug) {
496 if(result == -1) printf("setsockopt failed\n");
497 else printf("ttl set to %u\n", ttl);
498 }
499 }
501 /* stupid users should be able to give whatever thresholds they want
502 * (nothing will break if they do), but some anal plugin maintainer
503 * will probably add some printf() thing here later, so it might be
504 * best to at least show them where to do it. ;) */
505 if(warn.pl > crit.pl) warn.pl = crit.pl;
506 if(warn.rta > crit.rta) warn.rta = crit.rta;
507 if(warn_down > crit_down) crit_down = warn_down;
509 signal(SIGINT, finish);
510 signal(SIGHUP, finish);
511 signal(SIGTERM, finish);
512 signal(SIGALRM, finish);
513 if(debug) printf("Setting alarm timeout to %u seconds\n", timeout);
514 alarm(timeout);
516 /* make sure we don't wait any longer than necessary */
517 gettimeofday(&prog_start, &tz);
518 max_completion_time =
519 ((targets * packets * pkt_interval) + (targets * target_interval)) +
520 (targets * packets * crit.rta) + crit.rta;
522 if(debug) {
523 printf("packets: %u, targets: %u\n"
524 "target_interval: %0.3f, pkt_interval %0.3f\n"
525 "crit.rta: %0.3f\n"
526 "max_completion_time: %0.3f\n",
527 packets, targets,
528 (float)target_interval / 1000, (float)pkt_interval / 1000,
529 (float)crit.rta / 1000,
530 (float)max_completion_time / 1000);
531 }
533 if(debug) {
534 if(max_completion_time > (u_int)timeout * 1000000) {
535 printf("max_completion_time: %llu timeout: %u\n",
536 max_completion_time, timeout);
537 printf("Timout must be at lest %llu\n",
538 max_completion_time / 1000000 + 1);
539 }
540 }
542 icmp_pkt_size = icmp_data_size + ICMP_MINLEN;
543 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
544 if(icmp_pkt_size < sizeof(struct icmp) + sizeof(struct icmp_ping_data)) {
545 icmp_pkt_size = sizeof(struct icmp) + sizeof(struct icmp_ping_data);
546 }
547 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
549 if(debug) {
550 printf("crit = {%u, %u%%}, warn = {%u, %u%%}\n",
551 crit.rta, crit.pl, warn.rta, warn.pl);
552 printf("pkt_interval: %u target_interval: %u retry_interval: %u\n",
553 pkt_interval, target_interval, retry_interval);
554 printf("icmp_pkt_size: %u timeout: %u\n",
555 icmp_pkt_size, timeout);
556 }
558 if(packets > 20) {
559 errno = 0;
560 crash("packets is > 20 (%d)", packets);
561 }
563 host = list;
564 table = malloc(sizeof(struct rta_host **) * (argc - 1));
565 i = 0;
566 while(host) {
567 host->id = i;
568 table[i] = host;
569 host = host->next;
570 i++;
571 }
573 run_checks();
575 errno = 0;
576 finish(0);
578 return(0);
579 }
581 static void
582 run_checks()
583 {
584 u_int i, t, result;
585 u_int final_wait, time_passed;
587 /* this loop might actually violate the pkt_interval or target_interval
588 * settings, but only if there aren't any packets on the wire which
589 * indicates that the target can handle an increased packet rate */
590 for(i = 0; i < packets; i++) {
591 for(t = 0; t < targets; t++) {
592 /* don't send useless packets */
593 if(!targets_alive) finish(0);
594 if(table[t]->flags & FLAG_LOST_CAUSE) {
595 if(debug) printf("%s is a lost cause. not sending any more\n",
596 table[t]->name);
597 continue;
598 }
600 /* we're still in the game, so send next packet */
601 (void)send_icmp_ping(icmp_sock, table[t]);
602 result = wait_for_reply(icmp_sock, target_interval);
603 }
604 result = wait_for_reply(icmp_sock, pkt_interval * targets);
605 }
607 if(icmp_pkts_en_route && targets_alive) {
608 time_passed = get_timevaldiff(NULL, NULL);
609 final_wait = max_completion_time - time_passed;
611 if(debug) {
612 printf("time_passed: %u final_wait: %u max_completion_time: %llu\n",
613 time_passed, final_wait, max_completion_time);
614 }
615 if(time_passed > max_completion_time) {
616 if(debug) printf("Time passed. Finishing up\n");
617 finish(0);
618 }
620 /* catch the packets that might come in within the timeframe, but
621 * haven't yet */
622 if(debug) printf("Waiting for %u micro-seconds (%0.3f msecs)\n",
623 final_wait, (float)final_wait / 1000);
624 result = wait_for_reply(icmp_sock, final_wait);
625 }
626 }
628 /* response structure:
629 * ip header : 20 bytes
630 * icmp header : 28 bytes
631 * icmp echo reply : the rest
632 */
633 static int
634 wait_for_reply(int sock, u_int t)
635 {
636 int n, hlen;
637 static char buf[4096];
638 struct sockaddr_in resp_addr;
639 struct ip *ip;
640 struct icmp *icp, *sent_icmp;
641 struct rta_host *host;
642 struct icmp_ping_data *data;
643 struct timeval wait_start, now;
644 u_int tdiff, i, per_pkt_wait;
646 /* if we can't listen or don't have anything to listen to, just return */
647 if(!t || !icmp_pkts_en_route) return 0;
649 gettimeofday(&wait_start, &tz);
651 i = t;
652 per_pkt_wait = t / icmp_pkts_en_route;
653 while(icmp_pkts_en_route && get_timevaldiff(&wait_start, NULL) < i) {
654 t = per_pkt_wait;
656 /* wrap up if all targets are declared dead */
657 if(!targets_alive ||
658 get_timevaldiff(&prog_start, NULL) >= max_completion_time ||
659 (mode == MODE_HOSTCHECK && targets_down))
660 {
661 finish(0);
662 }
664 /* reap responses until we hit a timeout */
665 n = recvfrom_wto(sock, buf, sizeof(buf),
666 (struct sockaddr *)&resp_addr, &t);
667 if(!n) {
668 if(debug > 1) {
669 printf("recvfrom_wto() timed out during a %u usecs wait\n",
670 per_pkt_wait);
671 }
672 continue; /* timeout for this one, so keep trying */
673 }
674 if(n < 0) {
675 if(debug) printf("recvfrom_wto() returned errors\n");
676 return n;
677 }
679 ip = (struct ip *)buf;
680 if(debug > 1) printf("received %u bytes from %s\n",
681 ntohs(ip->ip_len), inet_ntoa(resp_addr.sin_addr));
683 /* obsolete. alpha on tru64 provides the necessary defines, but isn't broken */
684 /* #if defined( __alpha__ ) && __STDC__ && !defined( __GLIBC__ ) */
685 /* alpha headers are decidedly broken. Using an ansi compiler,
686 * they provide ip_vhl instead of ip_hl and ip_v, so we mask
687 * off the bottom 4 bits */
688 /* hlen = (ip->ip_vhl & 0x0f) << 2; */
689 /* #else */
690 hlen = ip->ip_hl << 2;
691 /* #endif */
693 if(n < (hlen + ICMP_MINLEN)) {
694 crash("received packet too short for ICMP (%d bytes, expected %d) from %s\n",
695 n, hlen + icmp_pkt_size, inet_ntoa(resp_addr.sin_addr));
696 }
697 /* else if(debug) { */
698 /* printf("ip header size: %u, packet size: %u (expected %u, %u)\n", */
699 /* hlen, ntohs(ip->ip_len) - hlen, */
700 /* sizeof(struct ip), icmp_pkt_size); */
701 /* } */
703 /* check the response */
704 icp = (struct icmp *)(buf + hlen);
705 sent_icmp = (struct icmp *)(buf + hlen + ICMP_MINLEN);
706 /* printf("buf: %p, icp: %p, distance: %u (expected %u)\n", */
707 /* buf, icp, */
708 /* (u_int)icp - (u_int)buf, hlen); */
709 /* printf("buf: %p, sent_icmp: %p, distance: %u (expected %u)\n", */
710 /* buf, sent_icmp, */
711 /* (u_int)sent_icmp - (u_int)buf, hlen + ICMP_MINLEN); */
713 if(icp->icmp_id != pid) {
714 handle_random_icmp(icp, &resp_addr);
715 continue;
716 }
718 if(icp->icmp_type != ICMP_ECHOREPLY || icp->icmp_seq >= targets) {
719 if(debug > 2) printf("not a proper ICMP_ECHOREPLY\n");
720 handle_random_icmp(icp, &resp_addr);
721 continue;
722 }
724 /* this is indeed a valid response */
725 data = (struct icmp_ping_data *)(icp->icmp_data);
727 host = table[icp->icmp_seq];
728 gettimeofday(&now, &tz);
729 tdiff = get_timevaldiff(&data->stime, &now);
731 host->time_waited += tdiff;
732 host->icmp_recv++;
733 icmp_recv++;
735 if(debug) {
736 printf("%0.3f ms rtt from %s, outgoing ttl: %u, incoming ttl: %u\n",
737 (float)tdiff / 1000, inet_ntoa(resp_addr.sin_addr),
738 ttl, ip->ip_ttl);
739 }
741 /* if we're in hostcheck mode, exit with limited printouts */
742 if(mode == MODE_HOSTCHECK) {
743 printf("OK - %s responds to ICMP. Packet %u, rta %0.3fms|"
744 "pkt=%u;;0;%u rta=%0.3f;%0.3f;%0.3f;;\n",
745 host->name, icmp_recv, (float)tdiff / 1000,
746 icmp_recv, packets, (float)tdiff / 1000,
747 (float)warn.rta / 1000, (float)crit.rta / 1000);
748 exit(STATE_OK);
749 }
750 }
752 return 0;
753 }
755 /* the ping functions */
756 static int
757 send_icmp_ping(int sock, struct rta_host *host)
758 {
759 static char *buf = NULL; /* re-use so we prevent leaks */
760 long int len;
761 struct icmp *icp;
762 struct icmp_ping_data *data;
763 struct timeval tv;
764 struct sockaddr *addr;
767 if(sock == -1) {
768 errno = 0;
769 crash("Attempt to send on bogus socket");
770 return -1;
771 }
772 addr = (struct sockaddr *)&host->saddr_in;
774 if(!buf) {
775 buf = (char *)malloc(icmp_pkt_size + sizeof(struct ip));
776 if(!buf) {
777 crash("send_icmp_ping(): failed to malloc %d bytes for send buffer",
778 icmp_pkt_size);
779 return -1; /* might be reached if we're in debug mode */
780 }
781 }
782 memset(buf, 0, icmp_pkt_size + sizeof(struct ip));
784 if((gettimeofday(&tv, &tz)) == -1) return -1;
786 icp = (struct icmp *)buf;
787 icp->icmp_type = ICMP_ECHO;
788 icp->icmp_code = 0;
789 icp->icmp_cksum = 0;
790 icp->icmp_id = pid;
791 icp->icmp_seq = host->id;
792 data = (struct icmp_ping_data *)icp->icmp_data;
793 data->ping_id = 10; /* host->icmp.icmp_sent; */
794 memcpy(&data->stime, &tv, sizeof(struct timeval));
795 icp->icmp_cksum = icmp_checksum((u_short *)icp, icmp_pkt_size);
797 len = sendto(sock, buf, icmp_pkt_size, 0, (struct sockaddr *)addr,
798 sizeof(struct sockaddr));
800 if(len < 0 || (unsigned int)len != icmp_pkt_size) {
801 if(debug) printf("Failed to send ping to %s\n",
802 inet_ntoa(host->saddr_in.sin_addr));
803 return -1;
804 }
806 icmp_sent++;
807 host->icmp_sent++;
809 return 0;
810 }
812 static int
813 recvfrom_wto(int sock, char *buf, unsigned int len, struct sockaddr *saddr,
814 u_int *timo)
815 {
816 u_int slen;
817 int n;
818 struct timeval to, then, now;
819 fd_set rd, wr;
821 if(!*timo) {
822 if(debug) printf("*timo is not\n");
823 return 0;
824 }
826 to.tv_sec = *timo / 1000000;
827 to.tv_usec = (*timo - (to.tv_sec * 1000000));
829 FD_ZERO(&rd);
830 FD_ZERO(&wr);
831 FD_SET(sock, &rd);
832 errno = 0;
833 gettimeofday(&then, &tz);
834 n = select(sock + 1, &rd, &wr, NULL, &to);
835 if(n < 0) crash("select() in recvfrom_wto");
836 gettimeofday(&now, &tz);
837 *timo = get_timevaldiff(&then, &now);
839 if(!n) return 0; /* timeout */
841 slen = sizeof(struct sockaddr);
843 return recvfrom(sock, buf, len, 0, saddr, &slen);
844 }
846 static void
847 finish(int sig)
848 {
849 u_int i = 0;
850 unsigned char pl;
851 double rta;
852 struct rta_host *host;
853 char *status_string[] =
854 {"OK", "WARNING", "CRITICAL", "UNKNOWN", "DEPENDENT"};
856 alarm(0);
857 if(debug > 1) printf("finish(%d) called\n", sig);
859 if(icmp_sock != -1) close(icmp_sock);
860 if(udp_sock != -1) close(udp_sock);
861 if(tcp_sock != -1) close(tcp_sock);
863 if(debug) {
864 printf("icmp_sent: %u icmp_recv: %u icmp_lost: %u\n",
865 icmp_sent, icmp_recv, icmp_lost);
866 printf("targets: %u targets_alive: %u\n", targets, targets_alive);
867 }
869 /* iterate thrice to calculate values, give output, and print perfparse */
870 host = list;
871 while(host) {
872 if(!host->icmp_recv) {
873 /* rta 0 is ofcourse not entirely correct, but will still show up
874 * conspicuosly as missing entries in perfparse and cacti */
875 pl = 100;
876 rta = 0;
877 status = STATE_CRITICAL;
878 /* up the down counter if not already counted */
879 if(!(host->flags & FLAG_LOST_CAUSE) && targets_alive) targets_down++;
880 }
881 else {
882 pl = ((host->icmp_sent - host->icmp_recv) * 100) / host->icmp_sent;
883 rta = (double)host->time_waited / host->icmp_recv;
884 }
885 host->pl = pl;
886 host->rta = rta;
887 if(!status && (pl >= warn.pl || rta >= warn.rta)) status = STATE_WARNING;
888 if(pl >= crit.pl || rta >= crit.rta) status = STATE_CRITICAL;
890 host = host->next;
891 }
892 /* this is inevitable */
893 if(!targets_alive) status = STATE_CRITICAL;
894 printf("%s - ", status_string[status]);
896 host = list;
897 while(host) {
898 if(debug) puts("");
899 if(i) {
900 if(i < targets) printf(" :: ");
901 else printf("\n");
902 }
903 i++;
904 if(!host->icmp_recv) {
905 status = STATE_CRITICAL;
906 if(host->flags & FLAG_LOST_CAUSE) {
907 printf("%s: %s @ %s. rta nan, lost %d%%",
908 host->name,
909 get_icmp_error_msg(host->icmp_type, host->icmp_code),
910 inet_ntoa(host->error_addr),
911 100);
912 }
913 else { /* not marked as lost cause, so we have no flags for it */
914 printf("%s: rta nan, lost 100%%", host->name);
915 }
916 }
917 else { /* !icmp_recv */
918 printf("%s: rta %0.3fms, lost %u%%",
919 host->name, host->rta / 1000, host->pl);
920 }
922 host = host->next;
923 }
925 /* iterate once more for pretty perfparse output */
926 printf("|");
927 i = 0;
928 host = list;
929 while(host) {
930 if(debug) puts("");
931 printf("%srta=%0.3fms;%0.3f;%0.3f;0; %spl=%u%%;%u;%u;; ",
932 (targets > 1) ? host->name : "",
933 host->rta / 1000, (float)warn.rta / 1000, (float)crit.rta / 1000,
934 (targets > 1) ? host->name : "",
935 host->pl, warn.pl, crit.pl);
937 host = host->next;
938 }
940 /* finish with an empty line */
941 puts("");
942 if(debug) printf("targets: %u, targets_alive: %u\n",
943 targets, targets_alive);
945 exit(status);
946 }
948 static u_int
949 get_timevaldiff(struct timeval *early, struct timeval *later)
950 {
951 u_int ret;
952 struct timeval now;
954 if(!later) {
955 gettimeofday(&now, &tz);
956 later = &now;
957 }
958 if(!early) early = &prog_start;
960 /* if early > later we return 0 so as to indicate a timeout */
961 if(early->tv_sec > early->tv_sec ||
962 (early->tv_sec == later->tv_sec && early->tv_usec > later->tv_usec))
963 {
964 return 0;
965 }
967 ret = (later->tv_sec - early->tv_sec) * 1000000;
968 ret += later->tv_usec - early->tv_usec;
970 return ret;
971 }
973 static int
974 add_target_ip(char *arg, struct in_addr *in)
975 {
976 struct rta_host *host;
978 /* disregard obviously stupid addresses */
979 if(in->s_addr == INADDR_NONE || in->s_addr == INADDR_ANY)
980 return -1;
982 /* no point in adding two identical IP's, so don't. ;) */
983 host = list;
984 while(host) {
985 if(host->saddr_in.sin_addr.s_addr == in->s_addr) {
986 if(debug) printf("Identical IP already exists. Not adding %s\n", arg);
987 return -1;
988 }
989 host = host->next;
990 }
992 /* add the fresh ip */
993 host = malloc(sizeof(struct rta_host));
994 if(!host) {
995 crash("add_target_ip(%s, %s): malloc(%d) failed",
996 arg, inet_ntoa(*in), sizeof(struct rta_host));
997 }
998 memset(host, 0, sizeof(struct rta_host));
1000 /* set the values. use calling name for output */
1001 host->name = strdup(arg);
1003 /* fill out the sockaddr_in struct */
1004 host->saddr_in.sin_family = AF_INET;
1005 host->saddr_in.sin_addr.s_addr = in->s_addr;
1007 if(!list) list = cursor = host;
1008 else cursor->next = host;
1010 cursor = host;
1011 targets++;
1013 return 0;
1014 }
1016 /* wrapper for add_target_ip */
1017 static int
1018 add_target(char *arg)
1019 {
1020 int i;
1021 struct hostent *he;
1022 struct in_addr *in, ip;
1024 /* don't resolve if we don't have to */
1025 if((ip.s_addr = inet_addr(arg)) != INADDR_NONE) {
1026 /* don't add all ip's if we were given a specific one */
1027 return add_target_ip(arg, &ip);
1028 /* he = gethostbyaddr((char *)in, sizeof(struct in_addr), AF_INET); */
1029 /* if(!he) return add_target_ip(arg, in); */
1030 }
1031 else {
1032 errno = 0;
1033 he = gethostbyname(arg);
1034 if(!he) {
1035 errno = 0;
1036 crash("Failed to resolve %s", arg);
1037 return -1;
1038 }
1039 }
1041 /* possibly add all the IP's as targets */
1042 for(i = 0; he->h_addr_list[i]; i++) {
1043 in = (struct in_addr *)he->h_addr_list[i];
1044 add_target_ip(arg, in);
1046 /* this is silly, but it works */
1047 if(mode == MODE_HOSTCHECK || mode == MODE_ALL) {
1048 printf("mode: %d\n", mode);
1049 continue;
1050 }
1051 break;
1052 }
1054 return 0;
1055 }
1056 /*
1057 * u = micro
1058 * m = milli
1059 * s = seconds
1060 * return value is in microseconds
1061 */
1062 static u_int
1063 get_timevar(const char *str)
1064 {
1065 char p, u, *ptr;
1066 unsigned int len;
1067 u_int i, d; /* integer and decimal, respectively */
1068 u_int factor = 1000; /* default to milliseconds */
1070 if(!str) return 0;
1071 len = strlen(str);
1072 if(!len) return 0;
1074 /* unit might be given as ms|m (millisec),
1075 * us|u (microsec) or just plain s, for seconds */
1076 u = p = '\0';
1077 u = str[len - 1];
1078 if(len >= 2 && !isdigit((int)str[len - 2])) p = str[len - 2];
1079 if(p && u == 's') u = p;
1080 else if(!p) p = u;
1081 if(debug > 2) printf("evaluating %s, u: %c, p: %c\n", str, u, p);
1083 if(u == 'u') factor = 1; /* microseconds */
1084 else if(u == 'm') factor = 1000; /* milliseconds */
1085 else if(u == 's') factor = 1000000; /* seconds */
1086 if(debug > 2) printf("factor is %u\n", factor);
1088 i = strtoul(str, &ptr, 0);
1089 if(!ptr || *ptr != '.' || strlen(ptr) < 2 || factor == 1)
1090 return i * factor;
1092 /* time specified in usecs can't have decimal points, so ignore them */
1093 if(factor == 1) return i;
1095 d = strtoul(ptr + 1, NULL, 0);
1097 /* d is decimal, so get rid of excess digits */
1098 while(d >= factor) d /= 10;
1100 /* the last parenthesis avoids floating point exceptions. */
1101 return ((i * factor) + (d * (factor / 10)));
1102 }
1104 /* not too good at checking errors, but it'll do (main() should barfe on -1) */
1105 static int
1106 get_threshold(char *str, threshold *th)
1107 {
1108 char *p = NULL, i = 0;
1110 if(!str || !strlen(str) || !th) return -1;
1112 /* pointer magic slims code by 10 lines. i is bof-stop on stupid libc's */
1113 p = &str[strlen(str) - 1];
1114 while(p != &str[1]) {
1115 if(*p == '%') *p = '\0';
1116 else if(*p == ',' && i) {
1117 *p = '\0'; /* reset it so get_timevar(str) works nicely later */
1118 th->pl = (unsigned char)strtoul(p+1, NULL, 0);
1119 break;
1120 }
1121 i = 1;
1122 p--;
1123 }
1124 th->rta = get_timevar(str);
1126 if(!th->rta) return -1;
1128 if(th->rta > MAXTTL * 1000000) th->rta = MAXTTL * 1000000;
1129 if(th->pl > 100) th->pl = 100;
1131 return 0;
1132 }
1134 unsigned short
1135 icmp_checksum(unsigned short *p, int n)
1136 {
1137 register unsigned short cksum;
1138 register long sum = 0;
1140 while(n > 1) {
1141 sum += *p++;
1142 n -= 2;
1143 }
1145 /* mop up the occasional odd byte */
1146 if(n == 1) sum += (unsigned char)*p;
1148 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
1149 sum += (sum >> 16); /* add carry */
1150 cksum = ~sum; /* ones-complement, trunc to 16 bits */
1152 return cksum;
1153 }
1155 /* make core plugin developers happy (silly, really) */
1156 static void
1157 usage(unsigned char arg, char *msg)
1158 {
1159 if(msg) printf("%s: %s\n", progname, msg);
1161 if(arg == 'V') {
1162 printf("$Id$\n");
1163 exit(STATE_UNKNOWN);
1164 }
1166 printf("Usage: %s [options] [-H] host1 host2 hostn\n\n", progname);
1168 if(arg != 'h') exit(3);
1170 printf("Where options are any combination of:\n"
1171 " * -H | --host specify a target\n"
1172 " * -w | --warn warning threshold (currently %0.3fms,%u%%)\n"
1173 " * -c | --crit critical threshold (currently %0.3fms,%u%%)\n"
1174 " * -n | --packets number of packets to send (currently %u)\n"
1175 " * -i | --interval max packet interval (currently %0.3fms)\n"
1176 " * -I | --hostint max target interval (currently %0.3fms)\n"
1177 " * -l | --ttl TTL on outgoing packets (currently %u)\n"
1178 " * -t | --timeout timeout value (seconds, currently %u)\n"
1179 " * -b | --bytes icmp packet size (currenly ignored)\n"
1180 " -v | --verbose verbosity++\n"
1181 " -h | --help this cruft\n",
1182 (float)warn.rta / 1000, warn.pl, (float)crit.rta / 1000, crit.pl,
1183 packets,
1184 (float)pkt_interval / 1000, (float)target_interval / 1000,
1185 ttl, timeout);
1187 puts("\nThe -H switch is optional. Naming a host (or several) to check is not.\n\n"
1188 "Threshold format for -w and -c is 200.25,60% for 200.25 msec RTA and 60%\n"
1189 "packet loss. The default values should work well for most users.\n"
1190 "You can specify different RTA factors using the standardized abbreviations\n"
1191 "us (microseconds), ms (milliseconds, default) or just plain s for seconds.\n\n"
1192 "Threshold format for -d is warn,crit. 12,14 means WARNING if >= 12 hops\n"
1193 "are spent and CRITICAL if >= 14 hops are spent.\n"
1194 "NOTE: Some systems decrease TTL when forming ICMP_ECHOREPLY, others do not.\n\n"
1195 "The -v switch can be specified several times for increased verbosity.\n\n"
1196 "Long options are currently unsupported.\n\n"
1197 "Options marked with * require an argument\n");
1199 puts("The latest version of this plugin can be found at http://oss.op5.se/nagios\n"
1200 "or https://devel.op5.se/oss until the day it is included in the official\n"
1201 "plugin distribution.\n");
1203 exit(3);
1204 }