1 /*
2 * check_ide-smart v.1 - hacked version of ide-smart for Nagios
3 * Copyright (C) 2000 Robert Dale <rdale@digital-mission.com>
4 *
5 * Nagios - http://www.nagios.org
6 *
7 * Notes:
8 * ide-smart has the same functionality as before. Some return
9 * values were changed, otherwise the --nagios option was added.
10 *
11 * Run with: check_ide-smart --nagios [-d] <DRIVE>
12 * Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
13 *
14 * - Returns 0 on no errors
15 * - Returns 1 on advisories
16 * - Returns 2 on prefailure
17 * - Returns -1 not too often
18 *
19 * ide-smart 1.3 - IDE S.M.A.R.T. checking tool
20 * Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
21 * 1998 Gadi Oxman <gadio@netvision.net.il>
22 *
23 * This program is free software; you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License as published by
25 * the Free Software Foundation; either version 2 of the License, or
26 * (at your option) any later version.
27 *
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32 *
33 * You should have received a copy of the GNU General Public License
34 * along with this program; if not, write to the Free Software
35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
36 *
37 * $Id$
38 */
40 const char *progname = "check_ide_smart";
41 const char *revision = "$Revision$";
42 const char *copyright = "2000-2004";
43 const char *email = "nagiosplug-devel@lists.sourceforge.net";
45 #include "common.h"
46 #include "utils.h"
48 void print_help (void);
49 void print_usage (void);
51 #include <sys/stat.h>
52 #include <sys/ioctl.h>
53 #include <fcntl.h>
54 #include <linux/hdreg.h>
55 #include <linux/types.h>
56 #include <errno.h>
58 #define NR_ATTRIBUTES 30
60 #ifndef TRUE
61 #define TRUE 1
62 #endif /* */
64 #define PREFAILURE 2
65 #define ADVISORY 1
66 #define OPERATIONAL 0
67 #define UNKNOWN -1
69 typedef struct threshold_s
70 {
71 __u8 id;
72 __u8 threshold;
73 __u8 reserved[10];
74 }
75 __attribute__ ((packed)) threshold_t;
77 typedef struct thresholds_s
78 {
79 __u16 revision;
80 threshold_t thresholds[NR_ATTRIBUTES];
81 __u8 reserved[18];
82 __u8 vendor[131];
83 __u8 checksum;
84 }
85 __attribute__ ((packed)) thresholds_t;
87 typedef struct value_s
88 {
89 __u8 id;
90 __u16 status;
91 __u8 value;
92 __u8 vendor[8];
93 }
94 __attribute__ ((packed)) value_t;
96 typedef struct values_s
97 {
98 __u16 revision;
99 value_t values[NR_ATTRIBUTES];
100 __u8 offline_status;
101 __u8 vendor1;
102 __u16 offline_timeout;
103 __u8 vendor2;
104 __u8 offline_capability;
105 __u16 smart_capability;
106 __u8 reserved[16];
107 __u8 vendor[125];
108 __u8 checksum;
109 }
110 __attribute__ ((packed)) values_t;
112 struct
113 {
114 __u8 value;
115 char *text;
116 }
118 offline_status_text[] =
119 {
120 {0x00, "NeverStarted"},
121 {0x02, "Completed"},
122 {0x04, "Suspended"},
123 {0x05, "Aborted"},
124 {0x06, "Failed"},
125 {0, 0}
126 };
128 struct
129 {
130 __u8 value;
131 char *text;
132 }
134 smart_command[] =
135 {
136 {SMART_ENABLE, "SMART_ENABLE"},
137 {SMART_DISABLE, "SMART_DISABLE"},
138 {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
139 {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
140 };
143 /* Index to smart_command table, keep in order */
144 enum SmartCommand
145 { SMART_CMD_ENABLE,
146 SMART_CMD_DISABLE,
147 SMART_CMD_IMMEDIATE_OFFLINE,
148 SMART_CMD_AUTO_OFFLINE
149 };
151 void print_values (values_t * p, thresholds_t * t);
152 int smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error);
154 int
155 main (int argc, char *argv[])
156 {
157 char *device = NULL;
158 int command = -1;
159 int o, longindex;
160 int retval = 0;
162 thresholds_t thresholds;
163 values_t values;
164 int fd;
166 static struct option longopts[] = {
167 {"device", required_argument, 0, 'd'},
168 {"immediate", no_argument, 0, 'i'},
169 {"quiet-check", no_argument, 0, 'q'},
170 {"auto-on", no_argument, 0, '1'},
171 {"auto-off", no_argument, 0, '0'},
172 {"nagios", no_argument, 0, 'n'},
173 {"help", no_argument, 0, 'h'},
174 {"version", no_argument, 0, 'V'}, {0, 0, 0, 0}
175 };
177 setlocale (LC_ALL, "");
178 bindtextdomain (PACKAGE, LOCALEDIR);
179 textdomain (PACKAGE);
181 while (1) {
183 o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
185 switch (o) {
186 case -1:
187 /*
188 * bail out of the switch but not the loop, so
189 * that device can be extracted from argv.
190 */
191 break;
192 case 'd':
193 device = optarg;
194 break;
195 case 'q':
196 command = 3;
197 break;
198 case 'i':
199 command = 2;
200 break;
201 case '1':
202 command = 1;
203 break;
204 case '0':
205 command = 0;
206 break;
207 case 'n':
208 command = 4;
209 break;
210 case 'h':
211 print_help ();
212 return STATE_OK;
213 case 'V':
214 print_revision (progname, revision);
215 return STATE_OK;
216 default:
217 usage2 (_("Unknown argument"), optarg);
218 }
220 if (optind < argc) {
221 device = argv[optind];
222 }
224 if (!device) {
225 print_help ();
226 return -1;
227 }
229 fd = open (device, O_RDONLY);
231 if (fd < 0) {
232 printf (_("CRITICAL - Couldn't open device %s: %s\n"), device, strerror (errno));
233 return 2;
234 }
236 if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
237 printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
238 return 2;
239 }
241 switch (command) {
242 case 0:
243 retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
244 break;
245 case 1:
246 retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
247 break;
248 case 2:
249 retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
250 break;
251 case 3:
252 smart_read_values (fd, &values);
253 smart_read_thresholds (fd, &thresholds);
254 retval = values_not_passed (&values, &thresholds);
255 break;
256 case 4:
257 smart_read_values (fd, &values);
258 smart_read_thresholds (fd, &thresholds);
259 retval = nagios (&values, &thresholds);
260 break;
261 default:
262 smart_read_values (fd, &values);
263 smart_read_thresholds (fd, &thresholds);
264 print_values (&values, &thresholds);
265 break;
266 }
267 close (fd);
268 }
269 return retval;
270 }
274 char *
275 get_offline_text (int status)
276 {
277 int i;
278 for (i = 0; offline_status_text[i].text; i++) {
279 if (offline_status_text[i].value == status) {
280 return offline_status_text[i].text;
281 }
282 }
283 return "UNKNOW";
284 }
288 int
289 smart_read_values (int fd, values_t * values)
290 {
291 int e;
292 __u8 args[4 + 512];
293 args[0] = WIN_SMART;
294 args[1] = 0;
295 args[2] = SMART_READ_VALUES;
296 args[3] = 1;
297 if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
298 e = errno;
299 printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
300 return e;
301 }
302 memcpy (values, args + 4, 512);
303 return 0;
304 }
308 int
309 values_not_passed (values_t * p, thresholds_t * t)
310 {
311 value_t * value = p->values;
312 threshold_t * threshold = t->thresholds;
313 int failed = 0;
314 int passed = 0;
315 int i;
316 for (i = 0; i < NR_ATTRIBUTES; i++) {
317 if (value->id && threshold->id && value->id == threshold->id) {
318 if (value->value <= threshold->threshold) {
319 ++failed;
320 }
321 else {
322 ++passed;
323 }
324 }
325 ++value;
326 ++threshold;
327 }
328 return (passed ? -failed : 2);
329 }
333 int
334 nagios (values_t * p, thresholds_t * t)
335 {
336 value_t * value = p->values;
337 threshold_t * threshold = t->thresholds;
338 int status = OPERATIONAL;
339 int prefailure = 0;
340 int advisory = 0;
341 int failed = 0;
342 int passed = 0;
343 int total = 0;
344 int i;
345 for (i = 0; i < NR_ATTRIBUTES; i++) {
346 if (value->id && threshold->id && value->id == threshold->id) {
347 if (value->value <= threshold->threshold) {
348 ++failed;
349 if (value->status & 1) {
350 status = PREFAILURE;
351 ++prefailure;
352 }
353 else {
354 status = ADVISORY;
355 ++advisory;
356 }
357 }
358 else {
359 ++passed;
360 }
361 ++total;
362 }
363 ++value;
364 ++threshold;
365 }
366 switch (status) {
367 case PREFAILURE:
368 printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
369 prefailure,
370 prefailure > 1 ? 's' : ' ',
371 failed,
372 total);
373 break;
374 case ADVISORY:
375 printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
376 advisory,
377 advisory > 1 ? "ies" : "y",
378 failed,
379 total);
380 break;
381 case OPERATIONAL:
382 printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
383 break;
384 default:
385 printf (_("ERROR - Status '%d' uknown. %d/%d tests passed\n"), status,
386 passed, total);
387 status = -1;
388 break;
389 }
390 return status;
391 }
395 void
396 print_value (value_t * p, threshold_t * t)
397 {
398 printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
399 p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory ",
400 p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
401 p->value > t->threshold ? "Passed" : "Failed");
402 }
406 void
407 print_values (values_t * p, thresholds_t * t)
408 {
409 value_t * value = p->values;
410 threshold_t * threshold = t->thresholds;
411 int i;
412 for (i = 0; i < NR_ATTRIBUTES; i++) {
413 if (value->id && threshold->id && value->id == threshold->id) {
414 print_value (value++, threshold++);
415 }
416 }
417 printf
418 (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
419 p->offline_status,
420 get_offline_text (p->offline_status & 0x7f),
421 (p->offline_status & 0x80 ? "Yes" : "No"),
422 p->offline_timeout / 60);
423 printf
424 (_("OffLineCapability=%d {%s %s %s}\n"),
425 p->offline_capability,
426 p->offline_capability & 1 ? "Immediate" : "",
427 p->offline_capability & 2 ? "Auto" : "",
428 p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
429 printf
430 (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
431 p->revision,
432 p->checksum,
433 p->smart_capability,
434 p->smart_capability & 1 ? "SaveOnStandBy" : "",
435 p->smart_capability & 2 ? "AutoSave" : "");
436 }
440 void
441 print_thresholds (thresholds_t * p)
442 {
443 threshold_t * threshold = p->thresholds;
444 int i;
445 printf ("\n");
446 printf ("SmartRevision=%d\n", p->revision);
447 for (i = 0; i < NR_ATTRIBUTES; i++) {
448 if (threshold->id) {
449 printf ("Id=%3d, Threshold=%3d\n", threshold->id,
450 threshold->threshold); }
451 ++threshold;
452 }
453 printf ("CheckSum=%d\n", p->checksum);
454 }
456 int
457 smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error)
458 {
459 int e = 0;
460 __u8 args[4];
461 args[0] = WIN_SMART;
462 args[1] = val0;
463 args[2] = smart_command[command].value;
464 args[3] = 0;
465 if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
466 e = errno;
467 if (show_error) {
468 printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
469 }
470 }
471 return e;
472 }
476 int
477 smart_read_thresholds (int fd, thresholds_t * thresholds)
478 {
479 int e;
480 __u8 args[4 + 512];
481 args[0] = WIN_SMART;
482 args[1] = 0;
483 args[2] = SMART_READ_THRESHOLDS;
484 args[3] = 1;
485 if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
486 e = errno;
487 printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
488 return e;
489 }
490 memcpy (thresholds, args + 4, 512);
491 return 0;
492 }
495 void
496 print_help (void)
497 {
498 print_revision (progname, revision);
500 printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
501 printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
502 printf (COPYRIGHT, copyright, email);
504 printf(_("This plugin checks a local hard drive with the (Linux specific) SMART interface [http://smartlinux.sourceforge.net/smart/index.php].\n\n"));
506 printf ("\
507 Usage: %s [OPTION] [DEVICE]\n\
508 -d, --device=DEVICE\n\
509 Select device DEVICE\n\
510 Note: if the device is selected with this option, _no_ other options are accepted\n\
511 -i, --immediate\n\
512 Perform immediately offline tests\n\
513 -q, --quiet-check\n\
514 Returns the number of failed tests\n\
515 -1, --auto-on\n\
516 Turn on automatic offline tests\n\
517 -0, --auto-off\n\
518 Turn off automatic offline tests\n\
519 -n, --nagios\n\
520 Output suitable for Nagios\n", progname);
521 }
524 void
525 print_usage (void)
526 {
527 printf ("\
528 Usage: %s [-d <device>] [-i <immediate>] [-q quiet] [-1 <auto-on>]\n\
529 [-O <auto-off>] [-n <nagios>]\n", progname);
530 }