1 /*****************************************************************************
2 *
3 * Nagios check_icmp plugin
4 *
5 * License: GPL
6 * Copyright (c) 2005-2008 Nagios Plugins Development Team
7 * Original Author : Andreas Ericsson <ae@op5.se>
8 *
9 * Last Modified: $Date$
10 *
11 * Description:
12 *
13 * This file contains the check_icmp plugin
14 *
15 * Relevant RFC's: 792 (ICMP), 791 (IP)
16 *
17 * This program was modeled somewhat after the check_icmp program,
18 * which was in turn a hack of fping (www.fping.org) but has been
19 * completely rewritten since to generate higher precision rta values,
20 * and support several different modes as well as setting ttl to control.
21 * redundant routes. The only remainders of fping is currently a few
22 * function names.
23 *
24 *
25 * This program is free software: you can redistribute it and/or modify
26 * it under the terms of the GNU General Public License as published by
27 * the Free Software Foundation, either version 3 of the License, or
28 * (at your option) any later version.
29 *
30 * This program is distributed in the hope that it will be useful,
31 * but WITHOUT ANY WARRANTY; without even the implied warranty of
32 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 * GNU General Public License for more details.
34 *
35 * You should have received a copy of the GNU General Public License
36 * along with this program. If not, see <http://www.gnu.org/licenses/>.
37 *
38 * $Id$
39 *
40 *****************************************************************************/
42 /* progname may change */
43 /* char *progname = "check_icmp"; */
44 char *progname;
45 const char *revision = "$Revision$";
46 const char *copyright = "2005-2008";
47 const char *email = "nagiosplug-devel@lists.sourceforge.net";
49 /** nagios plugins basic includes */
50 #include "common.h"
51 #include "netutils.h"
52 #include "utils.h"
54 #if HAVE_SYS_SOCKIO_H
55 #include <sys/sockio.h>
56 #endif
57 #include <sys/ioctl.h>
58 #include <sys/time.h>
59 #include <sys/types.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <stdarg.h>
63 #include <unistd.h>
64 #include <stddef.h>
65 #include <errno.h>
66 #include <string.h>
67 #include <ctype.h>
68 #include <netdb.h>
69 #include <sys/socket.h>
70 #include <net/if.h>
71 #include <netinet/in_systm.h>
72 #include <netinet/in.h>
73 #include <netinet/ip.h>
74 #include <netinet/ip_icmp.h>
75 #include <arpa/inet.h>
76 #include <signal.h>
79 /** sometimes undefined system macros (quite a few, actually) **/
80 #ifndef MAXTTL
81 # define MAXTTL 255
82 #endif
83 #ifndef INADDR_NONE
84 # define INADDR_NONE (in_addr_t)(-1)
85 #endif
87 #ifndef SOL_IP
88 #define SOL_IP 0
89 #endif
91 /* we bundle these in one #ifndef, since they're all from BSD
92 * Put individual #ifndef's around those that bother you */
93 #ifndef ICMP_UNREACH_NET_UNKNOWN
94 # define ICMP_UNREACH_NET_UNKNOWN 6
95 # define ICMP_UNREACH_HOST_UNKNOWN 7
96 # define ICMP_UNREACH_ISOLATED 8
97 # define ICMP_UNREACH_NET_PROHIB 9
98 # define ICMP_UNREACH_HOST_PROHIB 10
99 # define ICMP_UNREACH_TOSNET 11
100 # define ICMP_UNREACH_TOSHOST 12
101 #endif
102 /* tru64 has the ones above, but not these */
103 #ifndef ICMP_UNREACH_FILTER_PROHIB
104 # define ICMP_UNREACH_FILTER_PROHIB 13
105 # define ICMP_UNREACH_HOST_PRECEDENCE 14
106 # define ICMP_UNREACH_PRECEDENCE_CUTOFF 15
107 #endif
110 typedef unsigned short range_t; /* type for get_range() -- unimplemented */
112 typedef struct rta_host {
113 unsigned short id; /* id in **table, and icmp pkts */
114 char *name; /* arg used for adding this host */
115 char *msg; /* icmp error message, if any */
116 struct sockaddr_in saddr_in; /* the address of this host */
117 struct in_addr error_addr; /* stores address of error replies */
118 unsigned long long time_waited; /* total time waited, in usecs */
119 unsigned int icmp_sent, icmp_recv, icmp_lost; /* counters */
120 unsigned char icmp_type, icmp_code; /* type and code from errors */
121 unsigned short flags; /* control/status flags */
122 double rta; /* measured RTA */
123 unsigned char pl; /* measured packet loss */
124 struct rta_host *next; /* linked list */
125 } rta_host;
127 #define FLAG_LOST_CAUSE 0x01 /* decidedly dead target. */
129 /* threshold structure. all values are maximum allowed, exclusive */
130 typedef struct threshold {
131 unsigned char pl; /* max allowed packet loss in percent */
132 unsigned int rta; /* roundtrip time average, microseconds */
133 } threshold;
135 /* the data structure */
136 typedef struct icmp_ping_data {
137 struct timeval stime; /* timestamp (saved in protocol struct as well) */
138 unsigned short ping_id;
139 } icmp_ping_data;
141 /* the different modes of this program are as follows:
142 * MODE_RTA: send all packets no matter what (mimic check_icmp and check_ping)
143 * MODE_HOSTCHECK: Return immediately upon any sign of life
144 * In addition, sends packets to ALL addresses assigned
145 * to this host (as returned by gethostbyname() or
146 * gethostbyaddr() and expects one host only to be checked at
147 * a time. Therefore, any packet response what so ever will
148 * count as a sign of life, even when received outside
149 * crit.rta limit. Do not misspell any additional IP's.
150 * MODE_ALL: Requires packets from ALL requested IP to return OK (default).
151 * MODE_ICMP: implement something similar to check_icmp (MODE_RTA without
152 * tcp and udp args does this)
153 */
154 #define MODE_RTA 0
155 #define MODE_HOSTCHECK 1
156 #define MODE_ALL 2
157 #define MODE_ICMP 3
159 /* the different ping types we can do
160 * TODO: investigate ARP ping as well */
161 #define HAVE_ICMP 1
162 #define HAVE_UDP 2
163 #define HAVE_TCP 4
164 #define HAVE_ARP 8
166 #define MIN_PING_DATA_SIZE sizeof(struct icmp_ping_data)
167 #define MAX_IP_PKT_SIZE 65536 /* (theoretical) max IP packet size */
168 #define IP_HDR_SIZE 20
169 #define MAX_PING_DATA (MAX_IP_PKT_SIZE - IP_HDR_SIZE - ICMP_MINLEN)
170 #define DEFAULT_PING_DATA_SIZE (MIN_PING_DATA_SIZE + 44)
172 /* various target states */
173 #define TSTATE_INACTIVE 0x01 /* don't ping this host anymore */
174 #define TSTATE_WAITING 0x02 /* unanswered packets on the wire */
175 #define TSTATE_ALIVE 0x04 /* target is alive (has answered something) */
176 #define TSTATE_UNREACH 0x08
178 /** prototypes **/
179 void print_help (void);
180 void print_usage (void);
181 static u_int get_timevar(const char *);
182 static u_int get_timevaldiff(struct timeval *, struct timeval *);
183 static in_addr_t get_ip_address(const char *);
184 static int wait_for_reply(int, u_int);
185 static int recvfrom_wto(int, char *, unsigned int, struct sockaddr *, u_int *);
186 static int send_icmp_ping(int, struct rta_host *);
187 static int get_threshold(char *str, threshold *th);
188 static void run_checks(void);
189 static void set_source_ip(char *);
190 static int add_target(char *);
191 static int add_target_ip(char *, struct in_addr *);
192 static int handle_random_icmp(char *, struct sockaddr_in *);
193 static unsigned short icmp_checksum(unsigned short *, int);
194 static void finish(int);
195 static void crash(const char *, ...);
197 /** external **/
198 extern int optind, opterr, optopt;
199 extern char *optarg;
200 extern char **environ;
202 /** global variables **/
203 static struct rta_host **table, *cursor, *list;
204 static threshold crit = {80, 500000}, warn = {40, 200000};
205 static int mode, protocols, sockets, debug = 0, timeout = 10;
206 static unsigned short icmp_pkt_size, icmp_data_size = DEFAULT_PING_DATA_SIZE;
207 static unsigned int icmp_sent = 0, icmp_recv = 0, icmp_lost = 0;
208 #define icmp_pkts_en_route (icmp_sent - (icmp_recv + icmp_lost))
209 static unsigned short targets_down = 0, targets = 0, packets = 0;
210 #define targets_alive (targets - targets_down)
211 static unsigned int retry_interval, pkt_interval, target_interval;
212 static int icmp_sock, tcp_sock, udp_sock, status = STATE_OK;
213 static pid_t pid;
214 static struct timezone tz;
215 static struct timeval prog_start;
216 static unsigned long long max_completion_time = 0;
217 static unsigned char ttl = 0; /* outgoing ttl */
218 static unsigned int warn_down = 1, crit_down = 1; /* host down threshold values */
219 static int min_hosts_alive = -1;
220 float pkt_backoff_factor = 1.5;
221 float target_backoff_factor = 1.5;
223 /** code start **/
224 static void
225 crash(const char *fmt, ...)
226 {
227 va_list ap;
229 printf("%s: ", progname);
231 va_start(ap, fmt);
232 vprintf(fmt, ap);
233 va_end(ap);
235 if(errno) printf(": %s", strerror(errno));
236 puts("");
238 exit(3);
239 }
242 static const char *
243 get_icmp_error_msg(unsigned char icmp_type, unsigned char icmp_code)
244 {
245 const char *msg = "unreachable";
247 if(debug > 1) printf("get_icmp_error_msg(%u, %u)\n", icmp_type, icmp_code);
248 switch(icmp_type) {
249 case ICMP_UNREACH:
250 switch(icmp_code) {
251 case ICMP_UNREACH_NET: msg = "Net unreachable"; break;
252 case ICMP_UNREACH_HOST: msg = "Host unreachable"; break;
253 case ICMP_UNREACH_PROTOCOL: msg = "Protocol unreachable (firewall?)"; break;
254 case ICMP_UNREACH_PORT: msg = "Port unreachable (firewall?)"; break;
255 case ICMP_UNREACH_NEEDFRAG: msg = "Fragmentation needed"; break;
256 case ICMP_UNREACH_SRCFAIL: msg = "Source route failed"; break;
257 case ICMP_UNREACH_ISOLATED: msg = "Source host isolated"; break;
258 case ICMP_UNREACH_NET_UNKNOWN: msg = "Unknown network"; break;
259 case ICMP_UNREACH_HOST_UNKNOWN: msg = "Unknown host"; break;
260 case ICMP_UNREACH_NET_PROHIB: msg = "Network denied (firewall?)"; break;
261 case ICMP_UNREACH_HOST_PROHIB: msg = "Host denied (firewall?)"; break;
262 case ICMP_UNREACH_TOSNET: msg = "Bad TOS for network (firewall?)"; break;
263 case ICMP_UNREACH_TOSHOST: msg = "Bad TOS for host (firewall?)"; break;
264 case ICMP_UNREACH_FILTER_PROHIB: msg = "Prohibited by filter (firewall)"; break;
265 case ICMP_UNREACH_HOST_PRECEDENCE: msg = "Host precedence violation"; break;
266 case ICMP_UNREACH_PRECEDENCE_CUTOFF: msg = "Precedence cutoff"; break;
267 default: msg = "Invalid code"; break;
268 }
269 break;
271 case ICMP_TIMXCEED:
272 /* really 'out of reach', or non-existant host behind a router serving
273 * two different subnets */
274 switch(icmp_code) {
275 case ICMP_TIMXCEED_INTRANS: msg = "Time to live exceeded in transit"; break;
276 case ICMP_TIMXCEED_REASS: msg = "Fragment reassembly time exceeded"; break;
277 default: msg = "Invalid code"; break;
278 }
279 break;
281 case ICMP_SOURCEQUENCH: msg = "Transmitting too fast"; break;
282 case ICMP_REDIRECT: msg = "Redirect (change route)"; break;
283 case ICMP_PARAMPROB: msg = "Bad IP header (required option absent)"; break;
285 /* the following aren't error messages, so ignore */
286 case ICMP_TSTAMP:
287 case ICMP_TSTAMPREPLY:
288 case ICMP_IREQ:
289 case ICMP_IREQREPLY:
290 case ICMP_MASKREQ:
291 case ICMP_MASKREPLY:
292 default: msg = ""; break;
293 }
295 return msg;
296 }
298 static int
299 handle_random_icmp(char *packet, struct sockaddr_in *addr)
300 {
301 struct icmp p, sent_icmp;
302 struct rta_host *host = NULL;
304 memcpy(&p, packet, sizeof(p));
305 if(p.icmp_type == ICMP_ECHO && p.icmp_id == pid) {
306 /* echo request from us to us (pinging localhost) */
307 return 0;
308 }
310 if(debug) printf("handle_random_icmp(%p, %p)\n", (void *)&p, (void *)addr);
312 /* only handle a few types, since others can't possibly be replies to
313 * us in a sane network (if it is anyway, it will be counted as lost
314 * at summary time, but not as quickly as a proper response */
315 /* TIMXCEED can be an unreach from a router with multiple IP's which
316 * serves two different subnets on the same interface and a dead host
317 * on one net is pinged from the other. The router will respond to
318 * itself and thus set TTL=0 so as to not loop forever. Even when
319 * TIMXCEED actually sends a proper icmp response we will have passed
320 * too many hops to have a hope of reaching it later, in which case it
321 * indicates overconfidence in the network, poor routing or both. */
322 if(p.icmp_type != ICMP_UNREACH && p.icmp_type != ICMP_TIMXCEED &&
323 p.icmp_type != ICMP_SOURCEQUENCH && p.icmp_type != ICMP_PARAMPROB)
324 {
325 return 0;
326 }
328 /* might be for us. At least it holds the original package (according
329 * to RFC 792). If it isn't, just ignore it */
330 memcpy(&sent_icmp, packet + 28, sizeof(sent_icmp));
331 if(sent_icmp.icmp_type != ICMP_ECHO || sent_icmp.icmp_id != pid ||
332 sent_icmp.icmp_seq >= targets)
333 {
334 if(debug) printf("Packet is no response to a packet we sent\n");
335 return 0;
336 }
338 /* it is indeed a response for us */
339 host = table[sent_icmp.icmp_seq];
340 if(debug) {
341 printf("Received \"%s\" from %s for ICMP ECHO sent to %s.\n",
342 get_icmp_error_msg(p.icmp_type, p.icmp_code),
343 inet_ntoa(addr->sin_addr), host->name);
344 }
346 icmp_lost++;
347 host->icmp_lost++;
348 /* don't spend time on lost hosts any more */
349 if(host->flags & FLAG_LOST_CAUSE) return 0;
351 /* source quench means we're sending too fast, so increase the
352 * interval and mark this packet lost */
353 if(p.icmp_type == ICMP_SOURCEQUENCH) {
354 pkt_interval *= pkt_backoff_factor;
355 target_interval *= target_backoff_factor;
356 }
357 else {
358 targets_down++;
359 host->flags |= FLAG_LOST_CAUSE;
360 }
361 host->icmp_type = p.icmp_type;
362 host->icmp_code = p.icmp_code;
363 host->error_addr.s_addr = addr->sin_addr.s_addr;
365 return 0;
366 }
368 int
369 main(int argc, char **argv)
370 {
371 int i;
372 char *ptr;
373 long int arg;
374 int icmp_sockerrno, udp_sockerrno, tcp_sockerrno;
375 int result;
376 struct rta_host *host;
378 setlocale (LC_ALL, "");
379 bindtextdomain (PACKAGE, LOCALEDIR);
380 textdomain (PACKAGE);
382 /* print a helpful error message if geteuid != 0 */
383 np_warn_if_not_root();
385 /* we only need to be setsuid when we get the sockets, so do
386 * that before pointer magic (esp. on network data) */
387 icmp_sockerrno = udp_sockerrno = tcp_sockerrno = sockets = 0;
389 if((icmp_sock = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP)) != -1)
390 sockets |= HAVE_ICMP;
391 else icmp_sockerrno = errno;
393 /* if((udp_sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP)) != -1) */
394 /* sockets |= HAVE_UDP; */
395 /* else udp_sockerrno = errno; */
397 /* if((tcp_sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) */
398 /* sockets |= HAVE_TCP; */
399 /* else tcp_sockerrno = errno; */
401 /* now drop privileges (no effect if not setsuid or geteuid() == 0) */
402 setuid(getuid());
404 /* POSIXLY_CORRECT might break things, so unset it (the portable way) */
405 environ = NULL;
407 /* use the pid to mark packets as ours */
408 /* Some systems have 32-bit pid_t so mask off only 16 bits */
409 pid = getpid() & 0xffff;
410 /* printf("pid = %u\n", pid); */
412 /* get calling name the old-fashioned way for portability instead
413 * of relying on the glibc-ism __progname */
414 ptr = strrchr(argv[0], '/');
415 if(ptr) progname = &ptr[1];
416 else progname = argv[0];
418 /* now set defaults. Use progname to set them initially (allows for
419 * superfast check_host program when target host is up */
420 cursor = list = NULL;
421 table = NULL;
423 mode = MODE_RTA;
424 crit.rta = 500000;
425 crit.pl = 80;
426 warn.rta = 200000;
427 warn.pl = 40;
428 protocols = HAVE_ICMP | HAVE_UDP | HAVE_TCP;
429 pkt_interval = 80000; /* 80 msec packet interval by default */
430 packets = 5;
432 if(!strcmp(progname, "check_icmp") || !strcmp(progname, "check_ping")) {
433 mode = MODE_ICMP;
434 protocols = HAVE_ICMP;
435 }
436 else if(!strcmp(progname, "check_host")) {
437 mode = MODE_HOSTCHECK;
438 pkt_interval = 1000000;
439 packets = 5;
440 crit.rta = warn.rta = 1000000;
441 crit.pl = warn.pl = 100;
442 }
443 else if(!strcmp(progname, "check_rta_multi")) {
444 mode = MODE_ALL;
445 target_interval = 0;
446 pkt_interval = 50000;
447 packets = 5;
448 }
450 /* parse the arguments */
451 for(i = 1; i < argc; i++) {
452 while((arg = getopt(argc, argv, "vhVw:c:n:p:t:H:s:i:b:I:l:m:")) != EOF) {
453 switch(arg) {
454 case 'v':
455 debug++;
456 break;
457 case 'b':
458 /* silently ignored for now */
459 break;
460 case 'i':
461 pkt_interval = get_timevar(optarg);
462 break;
463 case 'I':
464 target_interval = get_timevar(optarg);
465 break;
466 case 'w':
467 get_threshold(optarg, &warn);
468 break;
469 case 'c':
470 get_threshold(optarg, &crit);
471 break;
472 case 'n':
473 case 'p':
474 packets = strtoul(optarg, NULL, 0);
475 break;
476 case 't':
477 timeout = strtoul(optarg, NULL, 0);
478 if(!timeout) timeout = 10;
479 break;
480 case 'H':
481 add_target(optarg);
482 break;
483 case 'l':
484 ttl = (unsigned char)strtoul(optarg, NULL, 0);
485 break;
486 case 'm':
487 min_hosts_alive = (int)strtoul(optarg, NULL, 0);
488 break;
489 case 'd': /* implement later, for cluster checks */
490 warn_down = (unsigned char)strtoul(optarg, &ptr, 0);
491 if(ptr) {
492 crit_down = (unsigned char)strtoul(ptr + 1, NULL, 0);
493 }
494 break;
495 case 's': /* specify source IP address */
496 set_source_ip(optarg);
497 break;
498 case 'V': /* version */
499 /*print_revision (progname, revision);*/ /* FIXME: Why? */
500 exit (STATE_OK);
501 case 'h': /* help */
502 print_help ();
503 exit (STATE_OK);
504 }
505 }
506 }
508 argv = &argv[optind];
509 while(*argv) {
510 add_target(*argv);
511 argv++;
512 }
513 if(!targets) {
514 errno = 0;
515 crash("No hosts to check");
516 exit(3);
517 }
519 if(!sockets) {
520 if(icmp_sock == -1) {
521 errno = icmp_sockerrno;
522 crash("Failed to obtain ICMP socket");
523 return -1;
524 }
525 /* if(udp_sock == -1) { */
526 /* errno = icmp_sockerrno; */
527 /* crash("Failed to obtain UDP socket"); */
528 /* return -1; */
529 /* } */
530 /* if(tcp_sock == -1) { */
531 /* errno = icmp_sockerrno; */
532 /* crash("Failed to obtain TCP socker"); */
533 /* return -1; */
534 /* } */
535 }
536 if(!ttl) ttl = 64;
538 if(icmp_sock) {
539 result = setsockopt(icmp_sock, SOL_IP, IP_TTL, &ttl, sizeof(ttl));
540 if(debug) {
541 if(result == -1) printf("setsockopt failed\n");
542 else printf("ttl set to %u\n", ttl);
543 }
544 }
546 /* stupid users should be able to give whatever thresholds they want
547 * (nothing will break if they do), but some anal plugin maintainer
548 * will probably add some printf() thing here later, so it might be
549 * best to at least show them where to do it. ;) */
550 if(warn.pl > crit.pl) warn.pl = crit.pl;
551 if(warn.rta > crit.rta) warn.rta = crit.rta;
552 if(warn_down > crit_down) crit_down = warn_down;
554 signal(SIGINT, finish);
555 signal(SIGHUP, finish);
556 signal(SIGTERM, finish);
557 signal(SIGALRM, finish);
558 if(debug) printf("Setting alarm timeout to %u seconds\n", timeout);
559 alarm(timeout);
561 /* make sure we don't wait any longer than necessary */
562 gettimeofday(&prog_start, &tz);
563 max_completion_time =
564 ((targets * packets * pkt_interval) + (targets * target_interval)) +
565 (targets * packets * crit.rta) + crit.rta;
567 if(debug) {
568 printf("packets: %u, targets: %u\n"
569 "target_interval: %0.3f, pkt_interval %0.3f\n"
570 "crit.rta: %0.3f\n"
571 "max_completion_time: %0.3f\n",
572 packets, targets,
573 (float)target_interval / 1000, (float)pkt_interval / 1000,
574 (float)crit.rta / 1000,
575 (float)max_completion_time / 1000);
576 }
578 if(debug) {
579 if(max_completion_time > (u_int)timeout * 1000000) {
580 printf("max_completion_time: %llu timeout: %u\n",
581 max_completion_time, timeout);
582 printf("Timout must be at lest %llu\n",
583 max_completion_time / 1000000 + 1);
584 }
585 }
587 icmp_pkt_size = icmp_data_size + ICMP_MINLEN;
588 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
589 if(icmp_pkt_size < sizeof(struct icmp) + sizeof(struct icmp_ping_data)) {
590 icmp_pkt_size = sizeof(struct icmp) + sizeof(struct icmp_ping_data);
591 }
592 if(debug > 2) printf("icmp_pkt_size = %u\n", icmp_pkt_size);
594 if(debug) {
595 printf("crit = {%u, %u%%}, warn = {%u, %u%%}\n",
596 crit.rta, crit.pl, warn.rta, warn.pl);
597 printf("pkt_interval: %u target_interval: %u retry_interval: %u\n",
598 pkt_interval, target_interval, retry_interval);
599 printf("icmp_pkt_size: %u timeout: %u\n",
600 icmp_pkt_size, timeout);
601 }
603 if(packets > 20) {
604 errno = 0;
605 crash("packets is > 20 (%d)", packets);
606 }
608 if(min_hosts_alive < -1) {
609 errno = 0;
610 crash("minimum alive hosts is negative (%i)", min_hosts_alive);
611 }
613 host = list;
614 table = malloc(sizeof(struct rta_host **) * (argc - 1));
615 i = 0;
616 while(host) {
617 host->id = i;
618 table[i] = host;
619 host = host->next;
620 i++;
621 }
623 run_checks();
625 errno = 0;
626 finish(0);
628 return(0);
629 }
631 static void
632 run_checks()
633 {
634 u_int i, t, result;
635 u_int final_wait, time_passed;
637 /* this loop might actually violate the pkt_interval or target_interval
638 * settings, but only if there aren't any packets on the wire which
639 * indicates that the target can handle an increased packet rate */
640 for(i = 0; i < packets; i++) {
641 for(t = 0; t < targets; t++) {
642 /* don't send useless packets */
643 if(!targets_alive) finish(0);
644 if(table[t]->flags & FLAG_LOST_CAUSE) {
645 if(debug) printf("%s is a lost cause. not sending any more\n",
646 table[t]->name);
647 continue;
648 }
650 /* we're still in the game, so send next packet */
651 (void)send_icmp_ping(icmp_sock, table[t]);
652 result = wait_for_reply(icmp_sock, target_interval);
653 }
654 result = wait_for_reply(icmp_sock, pkt_interval * targets);
655 }
657 if(icmp_pkts_en_route && targets_alive) {
658 time_passed = get_timevaldiff(NULL, NULL);
659 final_wait = max_completion_time - time_passed;
661 if(debug) {
662 printf("time_passed: %u final_wait: %u max_completion_time: %llu\n",
663 time_passed, final_wait, max_completion_time);
664 }
665 if(time_passed > max_completion_time) {
666 if(debug) printf("Time passed. Finishing up\n");
667 finish(0);
668 }
670 /* catch the packets that might come in within the timeframe, but
671 * haven't yet */
672 if(debug) printf("Waiting for %u micro-seconds (%0.3f msecs)\n",
673 final_wait, (float)final_wait / 1000);
674 result = wait_for_reply(icmp_sock, final_wait);
675 }
676 }
678 /* response structure:
679 * ip header : 20 bytes
680 * icmp header : 28 bytes
681 * icmp echo reply : the rest
682 */
683 static int
684 wait_for_reply(int sock, u_int t)
685 {
686 int n, hlen;
687 static char buf[4096];
688 struct sockaddr_in resp_addr;
689 struct ip *ip;
690 struct icmp icp;
691 struct rta_host *host;
692 struct icmp_ping_data data;
693 struct timeval wait_start, now;
694 u_int tdiff, i, per_pkt_wait;
696 /* if we can't listen or don't have anything to listen to, just return */
697 if(!t || !icmp_pkts_en_route) return 0;
699 gettimeofday(&wait_start, &tz);
701 i = t;
702 per_pkt_wait = t / icmp_pkts_en_route;
703 while(icmp_pkts_en_route && get_timevaldiff(&wait_start, NULL) < i) {
704 t = per_pkt_wait;
706 /* wrap up if all targets are declared dead */
707 if(!targets_alive ||
708 get_timevaldiff(&prog_start, NULL) >= max_completion_time ||
709 (mode == MODE_HOSTCHECK && targets_down))
710 {
711 finish(0);
712 }
714 /* reap responses until we hit a timeout */
715 n = recvfrom_wto(sock, buf, sizeof(buf),
716 (struct sockaddr *)&resp_addr, &t);
717 if(!n) {
718 if(debug > 1) {
719 printf("recvfrom_wto() timed out during a %u usecs wait\n",
720 per_pkt_wait);
721 }
722 continue; /* timeout for this one, so keep trying */
723 }
724 if(n < 0) {
725 if(debug) printf("recvfrom_wto() returned errors\n");
726 return n;
727 }
729 ip = (struct ip *)buf;
730 if(debug > 1) printf("received %u bytes from %s\n",
731 ntohs(ip->ip_len), inet_ntoa(resp_addr.sin_addr));
733 /* obsolete. alpha on tru64 provides the necessary defines, but isn't broken */
734 /* #if defined( __alpha__ ) && __STDC__ && !defined( __GLIBC__ ) */
735 /* alpha headers are decidedly broken. Using an ansi compiler,
736 * they provide ip_vhl instead of ip_hl and ip_v, so we mask
737 * off the bottom 4 bits */
738 /* hlen = (ip->ip_vhl & 0x0f) << 2; */
739 /* #else */
740 hlen = ip->ip_hl << 2;
741 /* #endif */
743 if(n < (hlen + ICMP_MINLEN)) {
744 crash("received packet too short for ICMP (%d bytes, expected %d) from %s\n",
745 n, hlen + icmp_pkt_size, inet_ntoa(resp_addr.sin_addr));
746 }
747 /* else if(debug) { */
748 /* printf("ip header size: %u, packet size: %u (expected %u, %u)\n", */
749 /* hlen, ntohs(ip->ip_len) - hlen, */
750 /* sizeof(struct ip), icmp_pkt_size); */
751 /* } */
753 /* check the response */
754 memcpy(&icp, buf + hlen, sizeof(icp));
756 if(icp.icmp_id != pid) {
757 handle_random_icmp(buf + hlen, &resp_addr);
758 continue;
759 }
761 if(icp.icmp_type != ICMP_ECHOREPLY || icp.icmp_seq >= targets) {
762 if(debug > 2) printf("not a proper ICMP_ECHOREPLY\n");
763 handle_random_icmp(buf + hlen, &resp_addr);
764 continue;
765 }
767 /* this is indeed a valid response */
768 memcpy(&data, icp.icmp_data, sizeof(data));
770 host = table[icp.icmp_seq];
771 gettimeofday(&now, &tz);
772 tdiff = get_timevaldiff(&data.stime, &now);
774 host->time_waited += tdiff;
775 host->icmp_recv++;
776 icmp_recv++;
778 if(debug) {
779 printf("%0.3f ms rtt from %s, outgoing ttl: %u, incoming ttl: %u\n",
780 (float)tdiff / 1000, inet_ntoa(resp_addr.sin_addr),
781 ttl, ip->ip_ttl);
782 }
784 /* if we're in hostcheck mode, exit with limited printouts */
785 if(mode == MODE_HOSTCHECK) {
786 printf("OK - %s responds to ICMP. Packet %u, rta %0.3fms|"
787 "pkt=%u;;0;%u rta=%0.3f;%0.3f;%0.3f;;\n",
788 host->name, icmp_recv, (float)tdiff / 1000,
789 icmp_recv, packets, (float)tdiff / 1000,
790 (float)warn.rta / 1000, (float)crit.rta / 1000);
791 exit(STATE_OK);
792 }
793 }
795 return 0;
796 }
798 /* the ping functions */
799 static int
800 send_icmp_ping(int sock, struct rta_host *host)
801 {
802 static union {
803 char *buf; /* re-use so we prevent leaks */
804 struct icmp *icp;
805 u_short *cksum_in;
806 } packet = { NULL };
807 long int len;
808 struct icmp_ping_data data;
809 struct timeval tv;
810 struct sockaddr *addr;
812 if(sock == -1) {
813 errno = 0;
814 crash("Attempt to send on bogus socket");
815 return -1;
816 }
817 addr = (struct sockaddr *)&host->saddr_in;
819 if(!packet.buf) {
820 if (!(packet.buf = malloc(icmp_pkt_size))) {
821 crash("send_icmp_ping(): failed to malloc %d bytes for send buffer",
822 icmp_pkt_size);
823 return -1; /* might be reached if we're in debug mode */
824 }
825 }
826 memset(packet.buf, 0, icmp_pkt_size);
828 if((gettimeofday(&tv, &tz)) == -1) return -1;
830 data.ping_id = 10; /* host->icmp.icmp_sent; */
831 memcpy(&data.stime, &tv, sizeof(tv));
832 memcpy(&packet.icp->icmp_data, &data, sizeof(data));
833 packet.icp->icmp_type = ICMP_ECHO;
834 packet.icp->icmp_code = 0;
835 packet.icp->icmp_cksum = 0;
836 packet.icp->icmp_id = pid;
837 packet.icp->icmp_seq = host->id;
838 packet.icp->icmp_cksum = icmp_checksum(packet.cksum_in, icmp_pkt_size);
840 len = sendto(sock, packet.buf, icmp_pkt_size, 0, (struct sockaddr *)addr,
841 sizeof(struct sockaddr));
843 if(len < 0 || (unsigned int)len != icmp_pkt_size) {
844 if(debug) printf("Failed to send ping to %s\n",
845 inet_ntoa(host->saddr_in.sin_addr));
846 return -1;
847 }
849 icmp_sent++;
850 host->icmp_sent++;
852 return 0;
853 }
855 static int
856 recvfrom_wto(int sock, char *buf, unsigned int len, struct sockaddr *saddr,
857 u_int *timo)
858 {
859 u_int slen;
860 int n;
861 struct timeval to, then, now;
862 fd_set rd, wr;
864 if(!*timo) {
865 if(debug) printf("*timo is not\n");
866 return 0;
867 }
869 to.tv_sec = *timo / 1000000;
870 to.tv_usec = (*timo - (to.tv_sec * 1000000));
872 FD_ZERO(&rd);
873 FD_ZERO(&wr);
874 FD_SET(sock, &rd);
875 errno = 0;
876 gettimeofday(&then, &tz);
877 n = select(sock + 1, &rd, &wr, NULL, &to);
878 if(n < 0) crash("select() in recvfrom_wto");
879 gettimeofday(&now, &tz);
880 *timo = get_timevaldiff(&then, &now);
882 if(!n) return 0; /* timeout */
884 slen = sizeof(struct sockaddr);
886 return recvfrom(sock, buf, len, 0, saddr, &slen);
887 }
889 static void
890 finish(int sig)
891 {
892 u_int i = 0;
893 unsigned char pl;
894 double rta;
895 struct rta_host *host;
896 const char *status_string[] =
897 {"OK", "WARNING", "CRITICAL", "UNKNOWN", "DEPENDENT"};
898 int hosts_ok = 0;
899 int hosts_warn = 0;
901 alarm(0);
902 if(debug > 1) printf("finish(%d) called\n", sig);
904 if(icmp_sock != -1) close(icmp_sock);
905 if(udp_sock != -1) close(udp_sock);
906 if(tcp_sock != -1) close(tcp_sock);
908 if(debug) {
909 printf("icmp_sent: %u icmp_recv: %u icmp_lost: %u\n",
910 icmp_sent, icmp_recv, icmp_lost);
911 printf("targets: %u targets_alive: %u\n", targets, targets_alive);
912 }
914 /* iterate thrice to calculate values, give output, and print perfparse */
915 host = list;
916 while(host) {
917 if(!host->icmp_recv) {
918 /* rta 0 is ofcourse not entirely correct, but will still show up
919 * conspicuosly as missing entries in perfparse and cacti */
920 pl = 100;
921 rta = 0;
922 status = STATE_CRITICAL;
923 /* up the down counter if not already counted */
924 if(!(host->flags & FLAG_LOST_CAUSE) && targets_alive) targets_down++;
925 }
926 else {
927 pl = ((host->icmp_sent - host->icmp_recv) * 100) / host->icmp_sent;
928 rta = (double)host->time_waited / host->icmp_recv;
929 }
930 host->pl = pl;
931 host->rta = rta;
932 if(pl >= crit.pl || rta >= crit.rta) {
933 status = STATE_CRITICAL;
934 }
935 else if(!status && (pl >= warn.pl || rta >= warn.rta)) {
936 status = STATE_WARNING;
937 hosts_warn++;
938 }
939 else {
940 hosts_ok++;
941 }
943 host = host->next;
944 }
945 /* this is inevitable */
946 if(!targets_alive) status = STATE_CRITICAL;
947 if(min_hosts_alive > -1) {
948 if(hosts_ok >= min_hosts_alive) status = STATE_OK;
949 else if((hosts_ok + hosts_warn) >= min_hosts_alive) status = STATE_WARNING;
950 }
951 printf("%s - ", status_string[status]);
953 host = list;
954 while(host) {
955 if(debug) puts("");
956 if(i) {
957 if(i < targets) printf(" :: ");
958 else printf("\n");
959 }
960 i++;
961 if(!host->icmp_recv) {
962 status = STATE_CRITICAL;
963 if(host->flags & FLAG_LOST_CAUSE) {
964 printf("%s: %s @ %s. rta nan, lost %d%%",
965 host->name,
966 get_icmp_error_msg(host->icmp_type, host->icmp_code),
967 inet_ntoa(host->error_addr),
968 100);
969 }
970 else { /* not marked as lost cause, so we have no flags for it */
971 printf("%s: rta nan, lost 100%%", host->name);
972 }
973 }
974 else { /* !icmp_recv */
975 printf("%s: rta %0.3fms, lost %u%%",
976 host->name, host->rta / 1000, host->pl);
977 }
979 host = host->next;
980 }
982 /* iterate once more for pretty perfparse output */
983 printf("|");
984 i = 0;
985 host = list;
986 while(host) {
987 if(debug) puts("");
988 printf("%srta=%0.3fms;%0.3f;%0.3f;0; %spl=%u%%;%u;%u;; ",
989 (targets > 1) ? host->name : "",
990 host->rta / 1000, (float)warn.rta / 1000, (float)crit.rta / 1000,
991 (targets > 1) ? host->name : "",
992 host->pl, warn.pl, crit.pl);
994 host = host->next;
995 }
997 if(min_hosts_alive > -1) {
998 if(hosts_ok >= min_hosts_alive) status = STATE_OK;
999 else if((hosts_ok + hosts_warn) >= min_hosts_alive) status = STATE_WARNING;
1000 }
1002 /* finish with an empty line */
1003 puts("");
1004 if(debug) printf("targets: %u, targets_alive: %u, hosts_ok: %u, hosts_warn: %u, min_hosts_alive: %i\n",
1005 targets, targets_alive, hosts_ok, hosts_warn, min_hosts_alive);
1007 exit(status);
1008 }
1010 static u_int
1011 get_timevaldiff(struct timeval *early, struct timeval *later)
1012 {
1013 u_int ret;
1014 struct timeval now;
1016 if(!later) {
1017 gettimeofday(&now, &tz);
1018 later = &now;
1019 }
1020 if(!early) early = &prog_start;
1022 /* if early > later we return 0 so as to indicate a timeout */
1023 if(early->tv_sec > early->tv_sec ||
1024 (early->tv_sec == later->tv_sec && early->tv_usec > later->tv_usec))
1025 {
1026 return 0;
1027 }
1029 ret = (later->tv_sec - early->tv_sec) * 1000000;
1030 ret += later->tv_usec - early->tv_usec;
1032 return ret;
1033 }
1035 static int
1036 add_target_ip(char *arg, struct in_addr *in)
1037 {
1038 struct rta_host *host;
1040 /* disregard obviously stupid addresses */
1041 if(in->s_addr == INADDR_NONE || in->s_addr == INADDR_ANY)
1042 return -1;
1044 /* no point in adding two identical IP's, so don't. ;) */
1045 host = list;
1046 while(host) {
1047 if(host->saddr_in.sin_addr.s_addr == in->s_addr) {
1048 if(debug) printf("Identical IP already exists. Not adding %s\n", arg);
1049 return -1;
1050 }
1051 host = host->next;
1052 }
1054 /* add the fresh ip */
1055 host = malloc(sizeof(struct rta_host));
1056 if(!host) {
1057 crash("add_target_ip(%s, %s): malloc(%d) failed",
1058 arg, inet_ntoa(*in), sizeof(struct rta_host));
1059 }
1060 memset(host, 0, sizeof(struct rta_host));
1062 /* set the values. use calling name for output */
1063 host->name = strdup(arg);
1065 /* fill out the sockaddr_in struct */
1066 host->saddr_in.sin_family = AF_INET;
1067 host->saddr_in.sin_addr.s_addr = in->s_addr;
1069 if(!list) list = cursor = host;
1070 else cursor->next = host;
1072 cursor = host;
1073 targets++;
1075 return 0;
1076 }
1078 /* wrapper for add_target_ip */
1079 static int
1080 add_target(char *arg)
1081 {
1082 int i;
1083 struct hostent *he;
1084 struct in_addr *in, ip;
1086 /* don't resolve if we don't have to */
1087 if((ip.s_addr = inet_addr(arg)) != INADDR_NONE) {
1088 /* don't add all ip's if we were given a specific one */
1089 return add_target_ip(arg, &ip);
1090 /* he = gethostbyaddr((char *)in, sizeof(struct in_addr), AF_INET); */
1091 /* if(!he) return add_target_ip(arg, in); */
1092 }
1093 else {
1094 errno = 0;
1095 he = gethostbyname(arg);
1096 if(!he) {
1097 errno = 0;
1098 crash("Failed to resolve %s", arg);
1099 return -1;
1100 }
1101 }
1103 /* possibly add all the IP's as targets */
1104 for(i = 0; he->h_addr_list[i]; i++) {
1105 in = (struct in_addr *)he->h_addr_list[i];
1106 add_target_ip(arg, in);
1108 /* this is silly, but it works */
1109 if(mode == MODE_HOSTCHECK || mode == MODE_ALL) {
1110 if(debug > 2) printf("mode: %d\n", mode);
1111 continue;
1112 }
1113 break;
1114 }
1116 return 0;
1117 }
1119 static void
1120 set_source_ip(char *arg)
1121 {
1122 struct sockaddr_in src;
1124 memset(&src, 0, sizeof(src));
1125 src.sin_family = AF_INET;
1126 if((src.sin_addr.s_addr = inet_addr(arg)) == INADDR_NONE)
1127 src.sin_addr.s_addr = get_ip_address(arg);
1128 if(bind(icmp_sock, (struct sockaddr *)&src, sizeof(src)) == -1)
1129 crash("Cannot bind to IP address %s", arg);
1130 }
1132 /* TODO: Move this to netutils.c and also change check_dhcp to use that. */
1133 static in_addr_t
1134 get_ip_address(const char *ifname)
1135 {
1136 #if defined(SIOCGIFADDR)
1137 struct ifreq ifr;
1138 struct sockaddr_in ip;
1140 strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name) - 1);
1141 ifr.ifr_name[sizeof(ifr.ifr_name) - 1] = '\0';
1142 if(ioctl(icmp_sock, SIOCGIFADDR, &ifr) == -1)
1143 crash("Cannot determine IP address of interface %s", ifname);
1144 memcpy(&ip, &ifr.ifr_addr, sizeof(ip));
1145 return ip.sin_addr.s_addr;
1146 #else
1147 errno = 0;
1148 crash("Cannot get interface IP address on this platform.");
1149 #endif
1150 }
1152 /*
1153 * u = micro
1154 * m = milli
1155 * s = seconds
1156 * return value is in microseconds
1157 */
1158 static u_int
1159 get_timevar(const char *str)
1160 {
1161 char p, u, *ptr;
1162 unsigned int len;
1163 u_int i, d; /* integer and decimal, respectively */
1164 u_int factor = 1000; /* default to milliseconds */
1166 if(!str) return 0;
1167 len = strlen(str);
1168 if(!len) return 0;
1170 /* unit might be given as ms|m (millisec),
1171 * us|u (microsec) or just plain s, for seconds */
1172 u = p = '\0';
1173 u = str[len - 1];
1174 if(len >= 2 && !isdigit((int)str[len - 2])) p = str[len - 2];
1175 if(p && u == 's') u = p;
1176 else if(!p) p = u;
1177 if(debug > 2) printf("evaluating %s, u: %c, p: %c\n", str, u, p);
1179 if(u == 'u') factor = 1; /* microseconds */
1180 else if(u == 'm') factor = 1000; /* milliseconds */
1181 else if(u == 's') factor = 1000000; /* seconds */
1182 if(debug > 2) printf("factor is %u\n", factor);
1184 i = strtoul(str, &ptr, 0);
1185 if(!ptr || *ptr != '.' || strlen(ptr) < 2 || factor == 1)
1186 return i * factor;
1188 /* time specified in usecs can't have decimal points, so ignore them */
1189 if(factor == 1) return i;
1191 d = strtoul(ptr + 1, NULL, 0);
1193 /* d is decimal, so get rid of excess digits */
1194 while(d >= factor) d /= 10;
1196 /* the last parenthesis avoids floating point exceptions. */
1197 return ((i * factor) + (d * (factor / 10)));
1198 }
1200 /* not too good at checking errors, but it'll do (main() should barfe on -1) */
1201 static int
1202 get_threshold(char *str, threshold *th)
1203 {
1204 char *p = NULL, i = 0;
1206 if(!str || !strlen(str) || !th) return -1;
1208 /* pointer magic slims code by 10 lines. i is bof-stop on stupid libc's */
1209 p = &str[strlen(str) - 1];
1210 while(p != &str[1]) {
1211 if(*p == '%') *p = '\0';
1212 else if(*p == ',' && i) {
1213 *p = '\0'; /* reset it so get_timevar(str) works nicely later */
1214 th->pl = (unsigned char)strtoul(p+1, NULL, 0);
1215 break;
1216 }
1217 i = 1;
1218 p--;
1219 }
1220 th->rta = get_timevar(str);
1222 if(!th->rta) return -1;
1224 if(th->rta > MAXTTL * 1000000) th->rta = MAXTTL * 1000000;
1225 if(th->pl > 100) th->pl = 100;
1227 return 0;
1228 }
1230 unsigned short
1231 icmp_checksum(unsigned short *p, int n)
1232 {
1233 register unsigned short cksum;
1234 register long sum = 0;
1236 while(n > 1) {
1237 sum += *p++;
1238 n -= 2;
1239 }
1241 /* mop up the occasional odd byte */
1242 if(n == 1) sum += (unsigned char)*p;
1244 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */
1245 sum += (sum >> 16); /* add carry */
1246 cksum = ~sum; /* ones-complement, trunc to 16 bits */
1248 return cksum;
1249 }
1251 void
1252 print_help(void)
1253 {
1255 /*print_revision (progname, revision);*/ /* FIXME: Why? */
1257 printf ("Copyright (c) 2005 Andreas Ericsson <ae@op5.se>\n");
1258 printf (COPYRIGHT, copyright, email);
1260 printf ("\n\n");
1262 print_usage ();
1264 printf (_(UT_HELP_VRSN));
1266 printf (" %s\n", "-H");
1267 printf (" %s\n", _("specify a target"));
1268 printf (" %s\n", "-w");
1269 printf (" %s", _("warning threshold (currently "));
1270 printf ("%0.3fms,%u%%)\n", (float)warn.rta / 1000 , warn.pl / 1000);
1271 printf (" %s\n", "-c");
1272 printf (" %s", _("critical threshold (currently "));
1273 printf ("%0.3fms,%u%%)\n", (float)crit.rta, crit.pl);
1274 printf (" %s\n", "-s");
1275 printf (" %s\n", _("specify a source IP address or device name"));
1276 printf (" %s\n", "-n");
1277 printf (" %s", _("number of packets to send (currently "));
1278 printf ("%u)\n",packets);
1279 printf (" %s\n", "-i");
1280 printf (" %s", _("max packet interval (currently "));
1281 printf ("%0.3fms)\n",(float)pkt_interval / 1000);
1282 printf (" %s\n", "-I");
1283 printf (" %s", _("max target interval (currently "));
1284 printf ("%0.3fms)\n", (float)target_interval / 1000);
1285 printf (" %s\n", "-m");
1286 printf (" %s",_("number of alive hosts required for success"));
1287 printf ("\n");
1288 printf (" %s\n", "-l");
1289 printf (" %s", _("TTL on outgoing packets (currently "));
1290 printf ("%u)", ttl);
1291 printf (" %s\n", "-t");
1292 printf (" %s",_("timeout value (seconds, currently "));
1293 printf ("%u)\n", timeout);
1294 printf (" %s\n", "-b");
1295 printf (" %s\n", _("icmp packet size (currenly ignored)"));
1296 printf (" %s\n", "-v");
1297 printf (" %s\n", _("verbose"));
1299 printf ("\n");
1300 printf ("%s\n\n", _("The -H switch is optional. Naming a host (or several) to check is not."));
1301 printf ("%s\n", _("Threshold format for -w and -c is 200.25,60% for 200.25 msec RTA and 60%"));
1302 printf ("%s\n", _("packet loss. The default values should work well for most users."));
1303 printf ("%s\n", _("You can specify different RTA factors using the standardized abbreviations"));
1304 printf ("%s\n\n", _("us (microseconds), ms (milliseconds, default) or just plain s for seconds."));
1305 /* -d not yet implemented */
1306 /* printf ("%s\n", _("Threshold format for -d is warn,crit. 12,14 means WARNING if >= 12 hops"));
1307 printf ("%s\n", _("are spent and CRITICAL if >= 14 hops are spent."));
1308 printf ("%s\n\n", _("NOTE: Some systems decrease TTL when forming ICMP_ECHOREPLY, others do not."));*/
1309 printf ("%s\n\n", _("The -v switch can be specified several times for increased verbosity."));
1311 /* printf ("%s\n", _("Long options are currently unsupported."));
1312 printf ("%s\n", _("Options marked with * require an argument"));
1313 */
1314 printf (_(UT_SUPPORT));
1316 printf (_(UT_NOWARRANTY));
1317 }
1321 void
1322 print_usage (void)
1323 {
1324 printf (_("Usage:"));
1325 printf(" %s [options] [-H] host1 host2 hostn\n", progname);
1326 }