From e667553b97c358f5d80608f62c291deffc0328d5 Mon Sep 17 00:00:00 2001 From: "M. Sean Finney" Date: Mon, 1 May 2006 21:52:42 +0000 Subject: [PATCH] - check_ntp: - now roughly feature-complete. - various bugfixes, esp. offset calculation. - enhanced the asynchronous offset polling to set requests that haven't recieved a response in >= 1 second to stale and retransmit them, which results in much better performance on unreliable networks. - we only spend timeout/2 seconds polling offsets, and if we don't get everything by that point we work with what we have and set status to warning/critical depending on how much data we have. - set the same defaults as the perl script. - commit changes to configure.in to support automatic building of check_apt (if apt-get is installed and regex libraries available) and check_ntp (unconditionally), now defaulting to check_ntp.c instead of the perl script. if this is an issue we can back out the commit of course. an eye should be kept on check_ntp building and running correctly in different environments, esp. 64-bit and big-endian platforms, and those with more "esoteric" API's (do any of the platforms not have poll()?). - similar changes to Makefile.am's. - common.h: add statement to include sys/poll.h - runcmd.c: exit STATE_UNKNOWN if execve() fails. git-svn-id: https://nagiosplug.svn.sourceforge.net/svnroot/nagiosplug/nagiosplug/trunk@1386 f882894a-f735-0410-b71e-b25c423dba1c --- configure.in | 34 +++--- plugins-scripts/Makefile.am | 2 +- plugins/Makefile.am | 10 +- plugins/check_ntp.c | 220 +++++++++++++++++++++++------------- plugins/common.h | 4 + plugins/runcmd.c | 2 +- 6 files changed, 174 insertions(+), 98 deletions(-) diff --git a/configure.in b/configure.in index c143905..6556ecd 100644 --- a/configure.in +++ b/configure.in @@ -529,8 +529,9 @@ dnl AC_HEADER_STDC AC_HEADER_TIME AC_HEADER_SYS_WAIT -AC_CHECK_HEADERS(signal.h strings.h string.h syslog.h uio.h errno.h regex.h sys/types.h sys/time.h sys/socket.h sys/loadavg.h sys/un.h) +AC_CHECK_HEADERS(signal.h strings.h string.h syslog.h uio.h errno.h sys/types.h sys/time.h sys/socket.h sys/loadavg.h sys/un.h sys/poll.h) AC_CHECK_HEADERS(features.h stdarg.h sys/unistd.h ctype.h stdlib.h) +AC_CHECK_HEADERS(regex.h,FOUNDREGEX=yes,FOUNDREGEX=no) dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST @@ -601,7 +602,7 @@ AC_TRY_COMPILE([#include ], dnl Checks for library functions. AC_CHECK_FUNCS(memmove select socket strdup strstr strtod strtol strtoul floor) -AC_CHECK_FUNCS(basename) +AC_CHECK_FUNCS(basename poll) AC_MSG_CHECKING(return type of socket size) AC_TRY_COMPILE([#include @@ -1263,20 +1264,6 @@ AC_ARG_WITH(rpcinfo_command, [sets path to rpcinfo]), PATH_TO_RPCINFO=$withval) AC_DEFINE_UNQUOTED(PATH_TO_RPCINFO,"$PATH_TO_RPCINFO",[path to rpcinfo binary]) -AC_PATH_PROG(PATH_TO_NTPDATE,ntpdate) -AC_ARG_WITH(ntpdate_command, - ACX_HELP_STRING([--with-ntpdate-command=PATH], - [sets path to ntpdate]), PATH_TO_NTPDATE=$withval) -AC_PATH_PROGS(PATH_TO_NTPDC,ntpdc xntpdc) -AC_PATH_PROGS(PATH_TO_NTPQ,ntpq) -if (test -n "$PATH_TO_NTPDATE" || test -n "$PATH_TO_NTPQ") -then - AC_DEFINE_UNQUOTED(PATH_TO_NTPQ,"$PATH_TO_NTPQ",[path to ntpq binary]) - AC_DEFINE_UNQUOTED(PATH_TO_NTPDATE,"$PATH_TO_NTPDATE",[path to ntpdate binary]) -else - AC_MSG_WARN([Install NTP programs (http://www.ntp.org) if you want to monitor time synchronization]) -fi - AC_PATH_PROG(PATH_TO_LMSTAT,lmstat) if test -x "$PATH_TO_LMSTAT" then @@ -1621,6 +1608,19 @@ if test -n "$PATH_TO_DIG"; then AC_DEFINE_UNQUOTED(PATH_TO_DIG,"$PATH_TO_DIG",[Path to dig command, if present]) fi +AC_PATH_PROG(PATH_TO_APTGET,apt-get) +AC_ARG_WITH(apt-get_command, + ACX_HELP_STRING([--with-apt-get-command=PATH], + [Path to apt-get command]), + with_apt_get_command=$withval, + with_apt_get_command=$PATH_TO_APTGET) +AC_DEFINE_UNQUOTED(PATH_TO_APTGET,"$PATH_TO_APTGET",[Path to apt-get command, if present]) +# check_apt needs regex support +if test -n "$PATH_TO_APTGET" && test "$FOUNDREGEX" = "yes"; then + EXTRAS="$EXTRAS check_apt" +fi + + if test -f plugins/check_nt.c ; then EXTRAS="$EXTRAS check_nt" elif test -f ../plugins/check_nt.c ; then @@ -1718,11 +1718,11 @@ dnl the ones below that are commented out need to be cleaned up dnl in the configure code above to use with_foo instead of ac_cv_foo dnl if we want them to show up here. it'd also make the code cleaner. dnl i'll get to that on another rainy day :) -sf +ACX_FEATURE([with],[apt-get-command]) dnl ACX_FEATURE([with],[dig-command]) dnl ACX_FEATURE([with],[fping-command]) dnl ACX_FEATURE([with],[mailq-command]) dnl ACX_FEATURE([with],[nslookup-command]) -dnl ACX_FEATURE([with],[ntpdate-command]) ACX_FEATURE([with],[ping6-command]) ACX_FEATURE([with],[ping-command]) dnl ACX_FEATURE([with],[qstat-command]) diff --git a/plugins-scripts/Makefile.am b/plugins-scripts/Makefile.am index 5556cf6..9de45d5 100644 --- a/plugins-scripts/Makefile.am +++ b/plugins-scripts/Makefile.am @@ -5,7 +5,7 @@ SUFFIXES = .pl .sh VPATH=$(top_srcdir) $(top_srcdir)/plugins-scripts $(top_srcdir)/plugins-scripts/t libexec_SCRIPTS = check_breeze check_disk_smb check_flexlm check_ircd \ - check_log check_ntp check_oracle check_rpc check_sensors check_wave \ + check_log check_oracle check_rpc check_sensors check_wave \ check_ifstatus check_ifoperstatus check_mailq check_file_age \ utils.sh utils.pm diff --git a/plugins/Makefile.am b/plugins/Makefile.am index dc01ff2..183f4f1 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -13,8 +13,8 @@ LIBS = @LIBINTL@ @LIBS@ @SSLLIBS@ MATHLIBS = @MATHLIBS@ AM_CFLAGS = -Wall -libexec_PROGRAMS = check_disk check_dummy check_http check_load \ - check_mrtg check_mrtgtraf check_nwstat check_overcr check_ping \ +libexec_PROGRAMS = check_apt check_disk check_dummy check_http check_load \ + check_mrtg check_mrtgtraf check_ntp check_nwstat check_overcr check_ping \ check_real check_smtp check_ssh check_tcp check_time \ check_udp check_ups check_users negate \ urlize @EXTRAS@ @@ -25,7 +25,7 @@ check_tcp_programs = check_ftp check_imap check_nntp check_pop \ EXTRA_PROGRAMS = check_mysql check_radius check_pgsql check_snmp check_hpjd \ check_swap check_fping check_ldap check_game check_dig \ check_nagios check_by_ssh check_dns check_nt check_ide_smart \ - check_procs check_mysql_query + check_procs check_mysql_query check_apt EXTRA_DIST = t utils.c netutils.c sslutils.c popen.c utils.h netutils.h \ popen.h common.h getaddrinfo.c getaddrinfo.h \ @@ -51,6 +51,7 @@ AM_INSTALL_PROGRAM_FLAGS = @INSTALL_OPTS@ ############################################################################## # the actual targets +check_apt_LDADD = $(BASEOBJS) runcmd.o check_dig_LDADD = $(NETLIBS) runcmd.o check_disk_LDADD = $(BASEOBJS) popen.o check_dns_LDADD = $(NETLIBS) runcmd.o @@ -71,6 +72,7 @@ check_mysql_query_CPPFLAGS = $(MYSQLINCLUDE) check_mysql_query_LDADD = $(NETLIBS) $(MYSQLLIBS) check_nagios_LDADD = $(BASEOBJS) runcmd.o check_nt_LDADD = $(NETLIBS) +check_ntp_LDADD = $(NETLIBS) $(MATHLIBS) check_nwstat_LDADD = $(NETLIBS) check_overcr_LDADD = $(NETLIBS) check_pgsql_LDADD = $(NETLIBS) $(PGLIBS) @@ -92,6 +94,7 @@ check_ide_smart_LDADD = $(BASEOBJS) negate_LDADD = $(BASEOBJS) popen.o urlize_LDADD = $(BASEOBJS) popen.o +check_apt_DEPENDENCIES = check_apt.c $(BASEOBJS) runcmd.o $(DEPLIBS) check_dig_DEPENDENCIES = check_dig.c $(NETOBJS) runcmd.o $(DEPLIBS) check_disk_DEPENDENCIES = check_disk.c $(BASEOBJS) popen.o $(DEPLIBS) check_dns_DEPENDENCIES = check_dns.c $(NETOBJS) runcmd.o $(DEPLIBS) @@ -109,6 +112,7 @@ check_mysql_DEPENDENCIES = check_mysql.c $(NETOBJS) $(DEPLIBS) check_mysql_query_DEPENDENCIES = check_mysql_query.c $(NETOBJS) $(DEPLIBS) check_nagios_DEPENDENCIES = check_nagios.c $(BASEOBJS) runcmd.o $(DEPLIBS) check_nt_DEPENDENCIES = check_nt.c $(NETOBJS) $(DEPLIBS) +check_ntp_DEPENDENCIES = check_ntp.c $(NETOBJS) $(DEPLIBS) check_nwstat_DEPENDENCIES = check_nwstat.c $(NETOBJS) $(DEPLIBS) check_overcr_DEPENDENCIES = check_overcr.c $(NETOBJS) $(DEPLIBS) check_pgsql_DEPENDENCIES = check_pgsql.c $(NETOBJS) $(DEPLIBS) diff --git a/plugins/check_ntp.c b/plugins/check_ntp.c index 149ca98..655dd4f 100644 --- a/plugins/check_ntp.c +++ b/plugins/check_ntp.c @@ -29,16 +29,15 @@ const char *email = "nagiosplug-devel@lists.sourceforge.net"; #include "common.h" #include "netutils.h" #include "utils.h" -#include static char *server_address=NULL; static int verbose=0; static int zero_offset_bad=0; -static double owarn=0; -static double ocrit=0; +static double owarn=60; +static double ocrit=120; static short do_jitter=0; -static double jwarn=0; -static double jcrit=0; +static double jwarn=5000; +static double jcrit=10000; int process_arguments (int, char **); void print_help (void); @@ -67,8 +66,11 @@ typedef struct { /* this structure holds data about results from querying offset from a peer */ typedef struct { - int waiting; /* we set to 1 to signal waiting for a response */ + time_t waiting; /* ts set when we started waiting for a response */ int num_responses; /* number of successfully recieved responses */ + uint8_t stratum; /* copied verbatim from the ntp_message */ + double rtdelay; /* converted from the ntp_message */ + double rtdisp; /* converted from the ntp_message */ double offset[AVG_NUM]; /* offsets from each response */ } ntp_server_results; @@ -192,13 +194,12 @@ typedef struct { /* calculate the offset of the local clock */ static inline double calc_offset(const ntp_message *m, const struct timeval *t){ - double client_tx, peer_rx, peer_tx, client_rx, rtdelay; + double client_tx, peer_rx, peer_tx, client_rx; client_tx = NTP64asDOUBLE(m->origts); peer_rx = NTP64asDOUBLE(m->rxts); peer_tx = NTP64asDOUBLE(m->txts); client_rx=TVasDOUBLE((*t)); - rtdelay=NTP32asDOUBLE(m->rtdelay); - return (.5*((peer_tx-client_rx)+(peer_rx-client_tx)))-rtdelay; + return (.5*((peer_tx-client_rx)+(peer_rx-client_tx))); } /* print out a ntp packet in human readable/debuggable format */ @@ -279,14 +280,63 @@ void setup_request(ntp_message *p){ TVtoNTP64(t,p->txts); } +/* select the "best" server from a list of servers, and return its index. + * this is done by filtering servers based on stratum, dispersion, and + * finally round-trip delay. */ +int best_offset_server(const ntp_server_results *slist, int nservers){ + int i=0, j=0, cserver=0, candidates[5], csize=0; + + /* for each server */ + for(cserver=0; cserveri; j--){ + candidates[j]=candidates[j-1]; + } + } + /* regardless, if they should be on the list... */ + if(i<5) { + candidates[i]=cserver; + if(csize<5) csize++; + /* otherwise discard the server */ + } else { + DBG(printf("discarding peer id %d\n", cserver)); + } + } + + if(csize>0) { + DBG(printf("best server selected: peer %d\n", candidates[0])); + return candidates[0]; + } else { + DBG(printf("no peers meeting synchronization criteria :(\n")); + return -1; + } +} + /* do everything we need to get the total average offset * - we use a certain amount of parallelization with poll() to ensure * we don't waste time sitting around waiting for single packets. * - we also "manually" handle resolving host names and connecting, because * we have to do it in a way that our lazy macros don't handle currently :( */ -double offset_request(const char *host){ +double offset_request(const char *host, int *status){ int i=0, j=0, ga_result=0, num_hosts=0, *socklist=NULL, respnum=0; - int servers_completed=0, one_written=0, servers_readable=0, offsets_recvd=0; + int servers_completed=0, one_written=0, servers_readable=0, best_index=-1; + time_t now_time=0, start_ts=0; ntp_message *req=NULL; double avg_offset=0.; struct timeval recv_time; @@ -337,28 +387,24 @@ double offset_request(const char *host){ ai_tmp = ai_tmp->ai_next; } - /* now do AVG_NUM checks to each host. */ - while(servers_completed= min_peer_sel){ + num_selected++; setup_control_request(&req, OP_READVAR, 2); req.assoc = peers[i].assoc; /* By spec, putting the variable name "jitter" in the request @@ -514,11 +571,12 @@ double jitter_request(const char *host){ printf("parsing jitter from peer %.2x: ", peers[i].assoc); } startofvalue = strchr(req.data, '=') + 1; - jitter = strtod(startofvalue, &nptr); - num_selected++; - if(jitter == 0 && startofvalue==nptr){ - printf("warning: unable to parse server response.\n"); - /* XXX errors value ... */ + if(startofvalue != NULL) { + jitter = strtod(startofvalue, &nptr); + } + if(startofvalue == NULL || startofvalue==nptr){ + printf("warning: unable to read server jitter response.\n"); + *status = STATE_WARNING; } else { if(verbose) printf("%g\n", jitter); num_valid++; @@ -527,7 +585,7 @@ double jitter_request(const char *host){ } } if(verbose){ - printf("jitter parsed from %d/%d peers\n", num_selected, num_valid); + printf("jitter parsed from %d/%d peers\n", num_valid, num_selected); } } @@ -637,9 +695,11 @@ int process_arguments(int argc, char **argv){ } int main(int argc, char *argv[]){ - int result = STATE_UNKNOWN; + int result, offset_result, jitter_result; double offset=0, jitter=0; + result=offset_result=jitter_result=STATE_UNKNOWN; + if (process_arguments (argc, argv) == ERROR) usage4 (_("Could not parse arguments")); @@ -649,14 +709,15 @@ int main(int argc, char *argv[]){ /* set socket timeout */ alarm (socket_timeout); - offset = offset_request(server_address); - if(offset > ocrit){ + offset = offset_request(server_address, &offset_result); + if(fabs(offset) > ocrit){ result = STATE_CRITICAL; - } else if(offset > owarn) { + } else if(fabs(offset) > owarn) { result = STATE_WARNING; } else { result = STATE_OK; } + result=max_state(result, offset_result); /* If not told to check the jitter, we don't even send packets. * jitter is checked using NTP control packets, which not all @@ -664,7 +725,7 @@ int main(int argc, char *argv[]){ * (for example) will result in an error */ if(do_jitter){ - jitter=jitter_request(server_address); + jitter=jitter_request(server_address, &jitter_result); if(jitter > jcrit){ result = max_state(result, STATE_CRITICAL); } else if(jitter > jwarn) { @@ -675,6 +736,7 @@ int main(int argc, char *argv[]){ result = STATE_UNKNOWN; } } + result=max_state(result, jitter_result); switch (result) { case STATE_CRITICAL : @@ -690,9 +752,15 @@ int main(int argc, char *argv[]){ printf("NTP UNKNOWN: "); break; } - - printf("Offset %g secs|offset=%g", offset, offset); - if (do_jitter) printf("|jitter=%f", jitter); + if(offset_result==STATE_CRITICAL){ + printf("Offset unknown|offset=unknown"); + } else { + if(offset_result==STATE_WARNING){ + printf("Unable to fully sample sync server. "); + } + printf("Offset %.10g secs|offset=%.10g", offset, offset); + } + if (do_jitter) printf(", jitter=%f", jitter); printf("\n"); if(server_address!=NULL) free(server_address); diff --git a/plugins/common.h b/plugins/common.h index 7438e40..57f4f93 100644 --- a/plugins/common.h +++ b/plugins/common.h @@ -119,6 +119,10 @@ # define SWAP_CONVERSION 1 #endif +#ifdef HAVE_SYS_POLL_H +# include "sys/poll.h" +#endif + /* * * Missing Functions diff --git a/plugins/runcmd.c b/plugins/runcmd.c index 4155796..bc4ee08 100644 --- a/plugins/runcmd.c +++ b/plugins/runcmd.c @@ -198,7 +198,7 @@ np_runcmd_open(const char *cmdstring, int *pfd, int *pfderr) close (i); execve (argv[0], argv, env); - _exit (0); + _exit (STATE_UNKNOWN); } /* parent picks up execution here */ -- 2.30.2