1 #! /usr/bin/perl -wT
2 #
3 # Check_apc_ups - Check APC UPS status via SNMP
4 # Shamelessly copied from check_breeze.pl
5 #
6 # To do:
7 # - Send SNMP queries directly, instead of forking `snmpget`.
8 # - Make the status less verbose. Maybe we can send an "onLine, time
9 # remaining: hh:mm:ss" if all is well, and a list of specific problems
10 # if something is broken.
12 BEGIN {
13 if ($0 =~ m/^(.*?)[\/\\]([^\/\\]+)$/) {
14 $runtimedir = $1;
15 $PROGNAME = $2;
16 }
17 }
19 use strict;
20 use Getopt::Long;
21 use vars qw($opt_V $opt_h $opt_H $opt_T $opt_t $opt_R $opt_r
22 $opt_L $opt_l $PROGNAME);
23 use lib $main::runtimedir;
24 use utils qw(%ERRORS &print_revision &support &usage);
26 sub print_help ();
27 sub print_usage ();
28 sub get_snmp_int_val ($);
29 sub escalate_exitval ($);
31 $ENV{'PATH'}='';
32 $ENV{'BASH_ENV'}='';
33 $ENV{'ENV'}='';
35 Getopt::Long::Configure('bundling');
36 GetOptions
37 ("V" => \$opt_V, "version" => \$opt_V,
38 "h" => \$opt_h, "help" => \$opt_h,
39 "T=s" => \$opt_T, "temp-critical" => \$opt_T,
40 "t=s" => \$opt_t, "temp-warning" => \$opt_t,
41 "R=s" => \$opt_R, "runtime-critical" => \$opt_R,
42 "r=s" => \$opt_r, "runtime-warning" => \$opt_r,
43 "L=s" => \$opt_L, "load-critical" => \$opt_L,
44 "l=s" => \$opt_l, "load-warning" => \$opt_l,
45 "H=s" => \$opt_H, "hostname=s" => \$opt_H);
47 if ($opt_V) {
48 print_revision($PROGNAME,'$Revision$');
49 exit $ERRORS{'OK'};
50 }
52 if ($opt_h) {print_help(); exit $ERRORS{'OK'};}
54 ($opt_H) || ($opt_H = shift) || usage("Host name/address not specified\n");
55 my $host = $1 if ($opt_H =~ /([-.A-Za-z0-9]+)/);
56 ($host) || usage("Invalid host: $opt_H\n");
58 # Defaults
60 $opt_R *= 60 * 100 if (defined $opt_R); # Convert minutes to secs/100
61 $opt_r *= 60 * 100 if (defined $opt_R);
63 my $tempcrit = $opt_T || 60;
64 my $tempwarn = $opt_t || 40;
65 my $runtimecrit = $opt_R || 30 * 60 * 100; # Secs / 100
66 my $runtimewarn = $opt_r || 60 * 60 * 100;
67 my $loadcrit = $opt_L || 85;
68 my $loadwarn = $opt_l || 50;
70 if ($tempcrit !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
71 if ($tempwarn !~ /\d+/) { usage ("Invalid critical temperature threshold.\n"); }
73 if ($runtimecrit !~ /\d+/) {
74 usage ("Invalid critical run time threshold.\n");
75 }
76 if ($runtimewarn !~ /\d+/) {
77 usage ("Invalid warning run time threshold.\n");
78 }
80 if ($loadcrit !~ /\d+/ || $loadcrit < 0 || $loadcrit > 100) {
81 usage ("Invalid critical load threshold.\n");
82 }
83 if ($loadwarn !~ /\d+/ || $loadwarn < 0 || $loadwarn > 100) {
84 usage ("Invalid warning load threshold.\n");
85 }
88 # APC UPS OIDs
89 # APC MIBs are available at ftp://ftp.apcftp.com/software/pnetmib/mib
90 my $upsBasicOutputStatus = ".1.3.6.1.4.1.318.1.1.1.4.1.1.0";
91 my $upsBasicBatteryStatus = ".1.3.6.1.4.1.318.1.1.1.2.1.1.0";
92 my $upsAdvInputLineFailCause = ".1.3.6.1.4.1.318.1.1.1.3.2.5.0";
93 my $upsAdvBatteryTemperature = ".1.3.6.1.4.1.318.1.1.1.2.2.2.0";
94 my $upsAdvBatteryRunTimeRemaining = ".1.3.6.1.4.1.318.1.1.1.2.2.3.0";
95 my $upsAdvBatteryReplaceIndicator = ".1.3.6.1.4.1.318.1.1.1.2.2.4.0";
96 my $upsAdvOutputLoad = ".1.3.6.1.4.1.318.1.1.1.4.2.3.0";
97 my $upsAdvTestDiagnosticsResults = ".1.3.6.1.4.1.318.1.1.1.7.2.3.0";
99 my @outputStatVals = (
100 [ undef, undef ], # pad 0
101 [ undef, undef ], # pad 1
102 [ "onLine", $ERRORS{'OK'} ], # 2
103 [ "onBattery", $ERRORS{'WARNING'} ], # 3
104 [ "onSmartBoost", $ERRORS{'WARNING'} ], # 4
105 [ "timedSleeping", $ERRORS{'WARNING'} ], # 5
106 [ "softwareBypass", $ERRORS{'WARNING'} ], # 6
107 [ "off", $ERRORS{'CRITICAL'} ], # 7
108 [ "rebooting", $ERRORS{'WARNING'} ], # 8
109 [ "switchedBypass", $ERRORS{'WARNING'} ], # 9
110 [ "hardwareFailureBypass", $ERRORS{'CRITICAL'} ], # 10
111 [ "sleepingUntilPowerReturn", $ERRORS{'CRITICAL'} ], # 11
112 [ "onSmartTrim", $ERRORS{'WARNING'} ], # 12
113 );
115 my @failCauseVals = (
116 undef,
117 "noTransfer",
118 "highLineVoltage",
119 "brownout",
120 "blackout",
121 "smallMomentarySag",
122 "deepMomentarySag",
123 "smallMomentarySpike",
124 "largeMomentarySpike",
125 "selfTest",
126 "rateOfVoltageChnage",
127 );
129 my @battStatVals = (
130 [ undef, undef ], # pad 0
131 [ undef, undef ], # pad 1
132 [ "batteryNormal", $ERRORS{'OK'} ], # 2
133 [ "batteryLow", $ERRORS{'CRITICAL'} ], # 3
134 );
136 my @battReplVals = (
137 [ undef, undef ], # pad 0
138 [ "noBatteryNeedsReplacing", $ERRORS{'OK'} ], # 1
139 [ "batteryNeedsReplacing", $ERRORS{'CRITICAL'} ], # 2
140 );
142 my @diagnosticsResultsVals = (
143 [ undef, undef ], # pad 0
144 [ "OK", $ERRORS{'OK'} ], # 1
145 [ "failed", $ERRORS{'CRITICAL'} ], # 2
146 [ "invalidTest", $ERRORS{'CRITICAL'} ], # 3
147 [ "testInProgress", $ERRORS{'OK'} ], # 4
148 );
150 my $exitval = $ERRORS{'UNKNOWN'};
151 my $data;
152 my $onbattery = 3;
154 $data = get_snmp_int_val( $upsBasicOutputStatus );
156 print "Output status: ";
157 if (defined ($data) && defined ($outputStatVals[$data][0])) {
158 print "$outputStatVals[$data][0] | ";
159 escalate_exitval($outputStatVals[$data][1]);
160 } else {
161 print "unknown | ";
162 }
164 $data = get_snmp_int_val( $upsAdvBatteryRunTimeRemaining );
166 print "Rem time: ";
167 if (defined ($data)) {
168 my $hrs = int($data / (60 * 60 * 100)); # Data is hundredths of a second
169 my $mins = int($data / (60 * 100)) % 60;
170 my $secs = ($data % 100) / 100;
171 printf "%d:%02d:%05.2f | ", $hrs, $mins, $secs;
172 if ($data <= $runtimecrit) {
173 escalate_exitval($ERRORS{'CRITICAL'});
174 } elsif ($data <= $runtimewarn) {
175 escalate_exitval($ERRORS{'WARNING'});
176 } else {
177 escalate_exitval($ERRORS{'OK'});
178 }
179 } else {
180 print "unknown | ";
181 }
183 $data = get_snmp_int_val( $upsBasicBatteryStatus );
185 print "Battery status: ";
186 if (defined ($data) && defined ($battStatVals[$data][0])) {
187 my $failcause = "unknown";
188 my $fc = get_snmp_int_val( $upsAdvInputLineFailCause );
189 if ($data == $onbattery) {
190 if (defined ($failCauseVals[$fc])) { $failcause = $failCauseVals[$fc]; }
191 print "$battStatVals[$data][0] ($failcause) | ";
192 } else {
193 print "$battStatVals[$data][0] | ";
194 }
195 escalate_exitval($battStatVals[$data][1]);
196 } else {
197 print "unknown | ";
198 }
200 $data = get_snmp_int_val( $upsAdvBatteryTemperature );
202 print "Battery temp(C): ";
203 if (defined ($data)) {
204 print "$data | ";
205 if ($data >= $tempcrit) {
206 escalate_exitval($ERRORS{'CRITICAL'});
207 } elsif ($data >= $tempwarn) {
208 escalate_exitval($ERRORS{'WARNING'});
209 } else {
210 escalate_exitval($ERRORS{'OK'});
211 }
212 } else {
213 print "unknown | ";
214 }
216 $data = get_snmp_int_val( $upsAdvBatteryReplaceIndicator );
218 print "Battery repl: ";
219 if (defined ($data) && defined ($battReplVals[$data][0])) {
220 print "$battReplVals[$data][0] | ";
221 escalate_exitval($battReplVals[$data][1]);
222 } else {
223 print "unknown | ";
224 }
226 $data = get_snmp_int_val( $upsAdvOutputLoad );
228 print "Output load (%): ";
229 if (defined ($data)) {
230 print "$data | ";
231 if ($data >= $loadcrit) {
232 escalate_exitval($ERRORS{'CRITICAL'});
233 } elsif ($data >= $loadwarn) {
234 escalate_exitval($ERRORS{'WARNING'});
235 } else {
236 escalate_exitval($ERRORS{'OK'});
237 }
238 } else {
239 print "unknown | ";
240 }
242 $data = get_snmp_int_val( $upsAdvTestDiagnosticsResults );
244 print "Diag result: ";
245 if (defined ($data) && defined ($diagnosticsResultsVals[$data][0])) {
246 print "$diagnosticsResultsVals[$data][0]\n";
247 escalate_exitval($diagnosticsResultsVals[$data][1]);
248 } else {
249 print "unknown\n";
250 }
253 exit $exitval;
256 sub print_usage () {
257 print "Usage: $PROGNAME -H <host> -T temp -t temp -R minutes -r minutes\n";
258 print " -L percent -l percent\n";
259 }
261 sub print_help () {
262 print_revision($PROGNAME,'$Revision$');
263 print "Copyright (c) 2001 Gerald Combs/Jeffrey Blank/Karl DeBisschop
265 This plugin reports the status of an APC UPS equipped with an SNMP management
266 module.
268 ";
269 print_usage();
270 print "
271 -H, --hostname=HOST
272 Name or IP address of host to check
273 -T --temp-critical
274 Battery degrees C above which a CRITICAL status will result (default: 60)
275 -t --temp-warning
276 Battery degrees C above which a WARNING status will result (default: 40)
277 -R --runtime-critical
278 Minutes remaining below which a CRITICAL status will result (default: 30)
279 -r --runtime-warning
280 Minutes remaining below which a WARNING status will result (default: 60)
281 -L --load-critical
282 Output load pct above which a CRITICAL status will result (default: 85
283 -l --load-warning
284 Output load pct above which a WARNING status will result (default: 50
286 ";
287 support();
288 }
290 sub get_snmp_int_val ($) {
291 my $val=0;
292 my $oid = shift(@_);
294 $val = `/usr/bin/snmpget $host public $oid 2> /dev/null`;
295 my @test = split(/ /,$val,3);
297 return undef unless (defined ($test[2]));
299 if ($test[2] =~ /\(\d+\)/) { # Later versions of UCD SNMP
300 ($val) = ($test[2] =~ /\((\d+)\)/);
301 } elsif ($test[2] =~ /: \d+/) {
302 ($val) = ($test[2] =~ /: (\d+)/);
303 } else {
304 $val = $test[2];
305 }
307 return $val;
308 }
310 sub escalate_exitval ($) {
311 my $newval = shift(@_);
313 if ($newval > $exitval) { $exitval = $newval; }
314 }