1 #!/usr/bin/perl -w
3 # (c)1999 Ian Cass, Knowledge Matters Ltd.
4 # Read the GNU copyright stuff for all the legalese
5 #
6 # Check NTP time servers plugin. This plugin requires the ntpdate utility to
7 # be installed on the system, however since it's part of the ntp suite, you
8 # should already have it installed.
9 #
10 # $Id$
11 #
12 # Nothing clever done in this program - its a very simple bare basics hack to
13 # get the job done.
14 #
15 # Things to do...
16 # check @words[9] for time differences greater than +/- x secs & return a
17 # warning.
18 #
19 # (c) 1999 Mark Jewiss, Knowledge Matters Limited
20 # 22-9-1999, 12:45
21 #
22 # Modified script to accept 2 parameters or set defaults.
23 # Now issues warning or critical alert is time difference is greater than the
24 # time passed.
25 #
26 # These changes have not been tested completely due to the unavailability of a
27 # server with the incorrect time.
28 #
29 # (c) 1999 Bo Kersey, VirCIO - Managed Server Solutions <bo@vircio.com>
30 # 22-10-99, 12:17
31 #
32 # Modified the script to give useage if no parameters are input.
33 #
34 # Modified the script to check for negative as well as positive
35 # time differences.
36 #
37 # Modified the script to work with ntpdate 3-5.93e Wed Apr 14 20:23:03 EDT 1999
38 #
39 # Modified the script to work with ntpdate's that return adjust or offset...
40 #
41 #
42 # Script modified 2000 June 01 by William Pietri <william@bianca.com>
43 #
44 # Modified script to handle weird cases:
45 # o NTP server doesn't respond (e.g., has died)
46 # o Server has correct time but isn't suitable synchronization
47 # source. This happens while starting up and if contact
48 # with master has been lost.
49 #
50 # Modifed to run under Embedded Perl (sghosh@users.sf.net)
51 # - combined logic some blocks together..
52 #
53 # Added ntpdate check for stratum 16 desynch peer (James Fidell) Feb 03, 2003
54 #
55 # ntpdate - offset is in seconds
56 # changed ntpdc to ntpq - jitter/dispersion is in milliseconds
57 #
58 # Patch for for regex for stratum1 refid.
60 require 5.004;
61 use POSIX;
62 use strict;
63 use Getopt::Long;
64 use vars qw($opt_V $opt_h $opt_H $opt_t $opt_w $opt_c $opt_j $opt_k $verbose $PROGNAME $def_jitter);
65 use lib utils.pm;
66 use utils qw($TIMEOUT %ERRORS &print_revision &support);
68 $PROGNAME="check_ntp";
70 sub print_help ();
71 sub print_usage ();
73 $ENV{'PATH'}='';
74 $ENV{'BASH_ENV'}='';
75 $ENV{'ENV'}='';
77 # defaults in sec
78 my $DEFAULT_OFFSET_WARN = 60; # 1 minute
79 my $DEFAULT_OFFSET_CRIT = 120; # 2 minutes
80 # default in millisec
81 my $DEFAULT_JITTER_WARN = 5000; # 5 sec
82 my $DEFAULT_JITTER_CRIT = 10000; # 10 sec
84 Getopt::Long::Configure('bundling');
85 GetOptions
86 ("V" => \$opt_V, "version" => \$opt_V,
87 "h" => \$opt_h, "help" => \$opt_h,
88 "v" => \$verbose, "verbose" => \$verbose,
89 "w=f" => \$opt_w, "warning=f" => \$opt_w, # offset|adjust warning if above this number
90 "c=f" => \$opt_c, "critical=f" => \$opt_c, # offset|adjust critical if above this number
91 "j=s" => \$opt_j, "jwarn=i" => \$opt_j, # jitter warning if above this number
92 "k=s" => \$opt_k, "jcrit=i" => \$opt_k, # jitter critical if above this number
93 "t=s" => \$opt_t, "timeout=i" => \$opt_t,
94 "H=s" => \$opt_H, "hostname=s" => \$opt_H);
96 if ($opt_V) {
97 print_revision($PROGNAME,'$Revision$ ');
98 exit $ERRORS{'OK'};
99 }
101 if ($opt_h) {
102 print_help();
103 exit $ERRORS{'OK'};
104 }
106 # jitter test params specified
107 if (defined $opt_j || defined $opt_k ) {
108 $def_jitter = 1;
109 }
111 $opt_H = shift unless ($opt_H);
112 my $host = $1 if ($opt_H && $opt_H =~ m/^([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+|[a-zA-Z][-a-zA-Z0-9]+(\.[a-zA-Z][-a-zA-Z0-9]+)*)$/);
113 unless ($host) {
114 print "No target host specified\n";
115 print_usage();
116 exit $ERRORS{'UNKNOWN'};
117 }
119 my ($timeout, $owarn, $ocrit, $jwarn, $jcrit);
121 $timeout = $TIMEOUT;
122 ($opt_t) && ($opt_t =~ /^([0-9]+)$/) && ($timeout = $1);
124 $owarn = $DEFAULT_OFFSET_WARN;
125 ($opt_w) && ($opt_w =~ /^([0-9.]+)$/) && ($owarn = $1);
127 $ocrit = $DEFAULT_OFFSET_CRIT;
128 ($opt_c) && ($opt_c =~ /^([0-9.]+)$/) && ($ocrit = $1);
130 $jwarn = $DEFAULT_JITTER_WARN;
131 ($opt_j) && ($opt_j =~ /^([0-9]+)$/) && ($jwarn = $1);
133 $jcrit = $DEFAULT_JITTER_CRIT;
134 ($opt_k) && ($opt_k =~ /^([0-9]+)$/) && ($jcrit = $1);
136 if ($ocrit < $owarn ) {
137 print "Critical offset should be larger than warning offset\n";
138 print_usage();
139 exit $ERRORS{"UNKNOWN"};
140 }
142 if ($def_jitter) {
143 if ($opt_k < $opt_j) {
144 print "Critical jitter should be larger than warning jitter\n";
145 print_usage();
146 exit $ERRORS{'UNKNOWN'};
147 }
148 }
151 my $stratum = -1;
152 my $ignoreret = 0;
153 my $answer = undef;
154 my $offset = undef;
155 my $jitter = undef;
156 my $syspeer = undef;
157 my $candidate = 0;
158 my @candidates;
159 my $msg; # first line of output to print if format is invalid
161 my $state = $ERRORS{'UNKNOWN'};
162 my $ntpdate_error = $ERRORS{'UNKNOWN'};
163 my $jitter_error = $ERRORS{'UNKNOWN'};
165 # some systems don't have a proper ntpq (migrated from ntpdc)
166 my $have_ntpq = undef;
167 if ($utils::PATH_TO_NTPQ && -x $utils::PATH_TO_NTPQ ) {
168 $have_ntpq = 1;
169 }else{
170 $have_ntpq = 0;
171 }
173 # Just in case of problems, let's not hang Nagios
174 $SIG{'ALRM'} = sub {
175 print ("ERROR: No response from ntp server (alarm)\n");
176 exit $ERRORS{"UNKNOWN"};
177 };
178 alarm($timeout);
181 ###
182 ###
183 ### First, check ntpdate
184 ###
185 ###
187 if (!open (NTPDATE, "$utils::PATH_TO_NTPDATE -q $host 2>&1 |")) {
188 print "Could not open ntpdate\n";
189 exit $ERRORS{"UNKNOWN"};
190 }
192 while (<NTPDATE>) {
193 print if ($verbose);
194 $msg = $_ unless ($msg);
196 if (/stratum\s(\d+)/) {
197 $stratum = $1;
198 }
200 if (/(offset|adjust)\s+([-.\d]+)/i) {
201 $offset = $2;
203 # An offset of 0.000000 with an error is probably bogus. Actually,
204 # it's probably always bogus, but let's be paranoid here.
205 if ($offset == 0) { undef $offset;}
207 $ntpdate_error = defined ($offset) ? $ERRORS{"OK"} : $ERRORS{"CRITICAL"};
208 print "ntperr = $ntpdate_error \n" if $verbose;
210 }
212 if (/no server suitable for synchronization found/) {
213 if ($stratum == 16) {
214 $ntpdate_error = $ERRORS{"WARNING"};
215 $msg = "Desynchronized peer server found";
216 $ignoreret=1;
217 }
218 else {
219 $ntpdate_error = $ERRORS{"CRITICAL"};
220 $msg = "No suitable peer server found - ";
221 }
222 }
224 }
226 close (NTPDATE);
227 # declare an error if we also get a non-zero return code from ntpdate
228 # unless already set to critical
229 if ( $? && !$ignoreret ) {
230 print "stderr = $? : $! \n" if $verbose;
231 $ntpdate_error = $ntpdate_error == $ERRORS{"CRITICAL"} ? $ERRORS{"CRITICAL"} : $ERRORS{"UNKNOWN"} ;
232 print "ntperr = $ntpdate_error : $!\n" if $verbose;
233 }
235 ###
236 ###
237 ### Then scan xntpq/ntpq if it exists
238 ### and look in the 11th column for jitter
239 ###
240 # Field 1: Tally Code ( Space, 'x','.','-','+','#','*','o')
241 # Only match for '*' which implies sys.peer
242 # or 'o' which implies pps.peer
243 # If both exist, the last one is picked.
244 # Field 2: address of the remote peer
245 # Field 3: Refid of the clock (0.0.0.0 if unknown, WWWV/PPS/GPS/ACTS/USNO/PCS/... if Stratum1)
246 # Field 4: stratum (0-15)
247 # Field 5: Type of the peer: local (l), unicast (u), multicast (m)
248 # broadcast (b); not sure about multicast/broadcast
249 # Field 6: last packet receive (in seconds)
250 # Field 7: polling interval
251 # Field 8: reachability resgister (octal)
252 # Field 9: delay
253 # Field 10: offset
254 # Field 11: dispersion/jitter
255 #
256 # According to bug 773588 Some solaris xntpd implementations seemto match on
257 # "#" even though the docs say it exceeds maximum distance. Providing patch
258 # here which will generate a warining.
260 if ($have_ntpq) {
262 if ( open(NTPQ,"$utils::PATH_TO_NTPQ -np $host 2>&1 |") ) {
263 while (<NTPQ>) {
264 print $_ if ($verbose);
265 if ( /timed out/ ){
266 $have_ntpq = 0 ;
267 last ;
268 }
269 # number of candidates on <host> for sys.peer
270 if (/^(\*|\+|\#|o])/) {
271 ++$candidate;
272 push (@candidates, $_);
273 print "Candiate count= $candidate\n" if ($verbose);
274 }
276 # match sys.peer or pps.peer
277 if (/^(\*|o)([-0-9.\s]+)\s+([-0-9A-Za-z.]+)\s+([-0-9.]+)\s+([lumb-]+)\s+([-0-9m.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)/) {
278 $syspeer = $2;
279 $stratum = $4;
280 $jitter = $11;
281 print "match $_ \n" if $verbose;
282 if ($jitter > $jcrit) {
283 print "Jitter_crit = $11 :$jcrit\n" if ($verbose);
284 $jitter_error = $ERRORS{'CRITICAL'};
285 } elsif ($jitter > $jwarn ) {
286 print "Jitter_warn = $11 :$jwarn \n" if ($verbose);
287 $jitter_error = $ERRORS{'WARNING'};
288 } else {
289 $jitter_error = $ERRORS{'OK'};
290 }
291 }
293 }
294 close NTPQ;
296 # if we did not match sys.peer or pps.peer but matched # candidates only
297 # generate a warning
298 # based on bug id 773588
299 unless (defined $syspeer) {
300 if ($#candidates >0) {
301 foreach my $c (@candidates) {
302 $c =~ /^(#)([-0-9.\s]+)\s+([-0-9A-Za-z.]+)\s+([-0-9.]+)\s+([lumb-]+)\s+([-0-9m.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)\s+([-0-9.]+)/;
303 $syspeer = $2;
304 $stratum = $4;
305 $jitter = $11;
306 print "candidate match $c \n" if $verbose;
307 if ($jitter > $jcrit) {
308 print "Candidate match - Jitter_crit = $11 :$jcrit\n" if ($verbose);
309 $jitter_error = $ERRORS{'CRITICAL'};
310 }elsif ($jitter > $jwarn ) {
311 print "Candidate match - Jitter_warn = $11 :$jwarn \n" if ($verbose);
312 $jitter_error = $ERRORS{'WARNING'};
313 } else {
314 $jitter_error = $ERRORS{'WARNING'};
315 }
316 }
318 }
319 }
320 }
321 }
324 if ($ntpdate_error != $ERRORS{'OK'}) {
325 $state = $ntpdate_error;
326 if ($ntpdate_error == $ERRORS{'WARNING'} ) {
327 $answer = $msg . "\n";
328 }
329 else {
330 $answer = $msg . "Server for ntp probably down\n";
331 }
333 if (defined($offset) && abs($offset) > $ocrit) {
334 $state = $ERRORS{'CRITICAL'};
335 $answer = "Server Error and offset $offset sec > +/- $ocrit sec\n";
336 } elsif (defined($offset) && abs($offset) > $owarn) {
337 $answer = "Server error and offset $offset sec > +/- $owarn sec\n";
338 } elsif (defined($jitter) && abs($jitter) > $jcrit) {
339 $answer = "Server error and jitter $jitter msec > +/- $jcrit msec\n";
340 } elsif (defined($jitter) && abs($jitter) > $jwarn) {
341 $answer = "Server error and jitter $jitter msec > +/- $jwarn msec\n";
342 }
344 } elsif ($have_ntpq && $jitter_error != $ERRORS{'OK'}) {
345 $state = $jitter_error;
346 $answer = "Jitter $jitter too high\n";
347 if (defined($offset) && abs($offset) > $ocrit) {
348 $state = $ERRORS{'CRITICAL'};
349 $answer = "Jitter error and offset $offset sec > +/- $ocrit sec\n";
350 } elsif (defined($offset) && abs($offset) > $owarn) {
351 $answer = "Jitter error and offset $offset sec > +/- $owarn sec\n";
352 } elsif (defined($jitter) && abs($jitter) > $jcrit) {
353 $answer = "Jitter error and jitter $jitter msec > +/- $jcrit msec\n";
354 } elsif (defined($jitter) && abs($jitter) > $jwarn) {
355 $answer = "Jitter error and jitter $jitter msec > +/- $jwarn msec\n";
356 }
358 } elsif( !$have_ntpq ) { # no errors from ntpdate and no ntpq or ntpq timed out
359 if (abs($offset) > $ocrit) {
360 $state = $ERRORS{'CRITICAL'};
361 $answer = "Offset $offset sec > +/- $ocrit sec\n";
362 } elsif (abs($offset) > $owarn) {
363 $state = $ERRORS{'WARNING'};
364 $answer = "Offset $offset sec > +/- $owarn sec\n";
365 } elsif (( abs($offset) > $owarn) && $def_jitter ) {
366 $state = $ERRORS{'WARNING'};
367 $answer = "Offset $offset sec > +/- $owarn sec, ntpq timed out\n";
368 } elsif ( $def_jitter ) {
369 $state = $ERRORS{'WARNING'};
370 $answer = "Offset $offset secs, ntpq timed out\n";
371 } else{
372 $state = $ERRORS{'OK'};
373 $answer = "Offset $offset secs \n";
374 }
378 } else { # no errors from ntpdate or ntpq
379 if (abs($offset) > $ocrit) {
380 $state = $ERRORS{'CRITICAL'};
381 $answer = "Offset $offset sec > +/- $ocrit sec, jitter $jitter msec\n";
382 } elsif (abs($jitter) > $jcrit ) {
383 $state = $ERRORS{'CRITICAL'};
384 $answer = "Jitter $jitter msec> +/- $jcrit msec, offset $offset sec \n";
385 } elsif (abs($offset) > $owarn) {
386 $state = $ERRORS{'WARNING'};
387 $answer = "Offset $offset sec > +/- $owarn sec, jitter $jitter msec\n";
388 } elsif (abs($jitter) > $jwarn ) {
389 $state = $ERRORS{'WARNING'};
390 $answer = "Jitter $jitter msec> +/- $jwarn msec, offset $offset sec \n";
392 } else {
393 $state = $ERRORS{'OK'};
394 $answer = "Offset $offset secs, jitter $jitter msec, peer is stratum $stratum\n";
395 }
397 }
399 foreach my $key (keys %ERRORS) {
400 if ($state==$ERRORS{$key}) {
401 print ("NTP $key: $answer");
402 last;
403 }
404 }
405 exit $state;
408 ####
409 #### subs
411 sub print_usage () {
412 print "Usage: $PROGNAME -H <host> [-w <warn>] [-c <crit>] [-j <warn>] [-k <crit>] [-v verbose]\n";
413 }
415 sub print_help () {
416 print_revision($PROGNAME,'$Revision$');
417 print "Copyright (c) 2003 Bo Kersey/Karl DeBisschop\n";
418 print "\n";
419 print_usage();
420 print "
421 Checks the local timestamp offset versus <host> with ntpdate
422 Checks the jitter/dispersion of clock signal between <host> and its sys.peer with ntpq\n
423 -w ( --warning)
424 Clock offset in seconds at which a warning message will be generated.\n Defaults to $DEFAULT_OFFSET_WARN.
425 -c (--critical)
426 Clock offset in seconds at which a critical message will be generated.\n Defaults to $DEFAULT_OFFSET_CRIT.
427 -j (--jwarn)
428 Clock jitter in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_JITTER_WARN.
429 -k (--jcrit)
430 Clock jitter in milliseconds at which a warning message will be generated.\n Defaults to $DEFAULT_JITTER_CRIT.\n
432 If jitter/dispersion is specified with -j or -k and ntpq times out, then a
433 warning is returned.
434 ";
435 support();
436 }