1 /*
2 * check_ide-smart v.1 - hacked version of ide-smart for Nagios
3 * Copyright (C) 2000 Robert Dale <rdale@digital-mission.com>
4 *
5 * Nagios - http://www.nagios.org
6 *
7 * Notes:
8 * ide-smart has the same functionality as before. Some return
9 * values were changed, otherwise the --net-saint option was added.
10 *
11 * Run with: check_ide-smart --net-saint [-d] <DRIVE>
12 * Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
13 *
14 * - Returns 0 on no errors
15 * - Returns 1 on advisories
16 * - Returns 2 on prefailure
17 * - Returns -1 not too often
18 *
19 * ide-smart 1.3 - IDE S.M.A.R.T. checking tool
20 * Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
21 * 1998 Gadi Oxman <gadio@netvision.net.il>
22 *
23 * This program is free software; you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License as published by
25 * the Free Software Foundation; either version 2 of the License, or
26 * (at your option) any later version.
27 *
28 * This program is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32 *
33 * You should have received a copy of the GNU General Public License
34 * along with this program; if not, write to the Free Software
35 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
36 *
37 * $Id$
38 */
40 const char *progname = "check_ide_smart";
41 const char *revision = "$Revision$";
42 const char *copyright = "2000-2004";
43 const char *email = "nagiosplug-devel@lists.sourceforge.net";
45 #include "common.h"
46 #include "utils.h"
48 #include <sys/stat.h>
49 #include <sys/ioctl.h>
50 #include <fcntl.h>
51 #include <linux/hdreg.h>
52 #include <linux/types.h>
53 #include <errno.h>
55 #define NR_ATTRIBUTES 30
57 #ifndef TRUE
58 #define TRUE 1
59 #endif /* */
61 #define PREFAILURE 2
62 #define ADVISORY 1
63 #define OPERATIONAL 0
64 #define UNKNOWN -1
66 typedef struct threshold_s
67 {
68 __u8 id;
69 __u8 threshold;
70 __u8 reserved[10];
71 }
72 __attribute__ ((packed)) threshold_t;
74 typedef struct thresholds_s
75 {
76 __u16 revision;
77 threshold_t thresholds[NR_ATTRIBUTES];
78 __u8 reserved[18];
79 __u8 vendor[131];
80 __u8 checksum;
81 }
82 __attribute__ ((packed)) thresholds_t;
84 typedef struct value_s
85 {
86 __u8 id;
87 __u16 status;
88 __u8 value;
89 __u8 vendor[8];
90 }
91 __attribute__ ((packed)) value_t;
93 typedef struct values_s
94 {
95 __u16 revision;
96 value_t values[NR_ATTRIBUTES];
97 __u8 offline_status;
98 __u8 vendor1;
99 __u16 offline_timeout;
100 __u8 vendor2;
101 __u8 offline_capability;
102 __u16 smart_capability;
103 __u8 reserved[16];
104 __u8 vendor[125];
105 __u8 checksum;
106 }
107 __attribute__ ((packed)) values_t;
109 struct
110 {
111 __u8 value;
112 char *text;
113 }
115 offline_status_text[] =
116 {
117 {0x00, "NeverStarted"},
118 {0x02, "Completed"},
119 {0x04, "Suspended"},
120 {0x05, "Aborted"},
121 {0x06, "Failed"},
122 {0, 0}
123 };
125 struct
126 {
127 __u8 value;
128 char *text;
129 }
131 smart_command[] =
132 {
133 {SMART_ENABLE, "SMART_ENABLE"},
134 {SMART_DISABLE, "SMART_DISABLE"},
135 {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
136 {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
137 };
140 /* Index to smart_command table, keep in order */
141 enum SmartCommand
142 { SMART_CMD_ENABLE,
143 SMART_CMD_DISABLE,
144 SMART_CMD_IMMEDIATE_OFFLINE,
145 SMART_CMD_AUTO_OFFLINE
146 };
149 int
150 main (int argc, char *argv[])
151 {
152 char *device = NULL;
153 int command = -1;
154 int o, longindex;
155 int retval = 0;
157 thresholds_t thresholds;
158 values_t values;
159 int fd;
161 static struct option longopts[] = {
162 {"device", required_argument, 0, 'd'},
163 {"immediate", no_argument, 0, 'i'},
164 {"quiet-check", no_argument, 0, 'q'},
165 {"auto-on", no_argument, 0, '1'},
166 {"auto-off", no_argument, 0, '0'},
167 {"net-saint", no_argument, 0, 'n'},
168 {"help", no_argument, 0, 'h'},
169 {"version", no_argument, 0, 'V'}, {0, 0, 0, 0}
170 };
172 setlocale (LC_ALL, "");
173 bindtextdomain (PACKAGE, LOCALEDIR);
174 textdomain (PACKAGE);
176 while (1) {
178 o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
180 if (o == -1 || o == EOF)
181 break;
183 switch (o) {
184 case 'd':
185 device = optarg;
186 break;
187 case 'q':
188 command = 3;
189 break;
190 case 'i':
191 command = 2;
192 break;
193 case '1':
194 command = 1;
195 break;
196 case '0':
197 command = 0;
198 break;
199 case 'n':
200 command = 4;
201 break;
202 case 'h':
203 print_help ();
204 return STATE_OK;
205 case 'V':
206 print_revision (progname, revision);
207 return STATE_OK;
208 default:
209 usage2 (_("Unknown argument"), optarg);
210 }
212 if (optind < argc) {
213 device = argv[optind];
214 }
216 if (!device) {
217 show_help ();
218 show_version ();
219 return -1;
220 }
222 fd = open (device, O_RDONLY);
224 if (fd < 0) {
225 printf (_("CRITICAL - Couldn't open device: %s\n"), strerror (errno));
226 return 2;
227 }
229 if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
230 printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
231 return 2;
232 }
234 switch (command) {
235 case 0:
236 retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
237 break;
238 case 1:
239 retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
240 break;
241 case 2:
242 retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
243 break;
244 case 3:
245 smart_read_values (fd, &values);
246 smart_read_thresholds (fd, &thresholds);
247 retval = values_not_passed (&values, &thresholds);
248 break;
249 case 4:
250 smart_read_values (fd, &values);
251 smart_read_thresholds (fd, &thresholds);
252 retval = net_saint (&values, &thresholds);
253 break;
254 default:
255 smart_read_values (fd, &values);
256 smart_read_thresholds (fd, &thresholds);
257 print_values (&values, &thresholds);
258 break;
259 }
260 close (fd);
261 }
262 return retval;
263 }
267 char *
268 get_offline_text (int status)
269 {
270 int i;
271 for (i = 0; offline_status_text[i].text; i++) {
272 if (offline_status_text[i].value == status) {
273 return offline_status_text[i].text;
274 }
275 }
276 return "UNKNOW";
277 }
281 int
282 smart_read_values (int fd, values_t * values)
283 {
284 int e;
285 __u8 args[4 + 512];
286 args[0] = WIN_SMART;
287 args[1] = 0;
288 args[2] = SMART_READ_VALUES;
289 args[3] = 1;
290 if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
291 e = errno;
292 printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
293 return e;
294 }
295 memcpy (values, args + 4, 512);
296 return 0;
297 }
301 int
302 values_not_passed (values_t * p, thresholds_t * t)
303 {
304 value_t * value = p->values;
305 threshold_t * threshold = t->thresholds;
306 int failed = 0;
307 int passed = 0;
308 int i;
309 for (i = 0; i < NR_ATTRIBUTES; i++) {
310 if (value->id && threshold->id && value->id == threshold->id) {
311 if (value->value <= threshold->threshold) {
312 ++failed;
313 }
314 else {
315 ++passed;
316 }
317 }
318 ++value;
319 ++threshold;
320 }
321 return (passed ? -failed : 2);
322 }
326 int
327 net_saint (values_t * p, thresholds_t * t)
328 {
329 value_t * value = p->values;
330 threshold_t * threshold = t->thresholds;
331 int status = OPERATIONAL;
332 int prefailure = 0;
333 int advisory = 0;
334 int failed = 0;
335 int passed = 0;
336 int total = 0;
337 int i;
338 for (i = 0; i < NR_ATTRIBUTES; i++) {
339 if (value->id && threshold->id && value->id == threshold->id) {
340 if (value->value <= threshold->threshold) {
341 ++failed;
342 if (value->status & 1) {
343 status = PREFAILURE;
344 ++prefailure;
345 }
346 else {
347 status = ADVISORY;
348 ++advisory;
349 }
350 }
351 else {
352 ++passed;
353 }
354 ++total;
355 }
356 ++value;
357 ++threshold;
358 }
359 switch (status) {
360 case PREFAILURE:
361 printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
362 prefailure,
363 prefailure > 1 ? 's' : ' ',
364 failed,
365 total);
366 break;
367 case ADVISORY:
368 printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
369 advisory,
370 advisory > 1 ? "ies" : "y",
371 failed,
372 total);
373 break;
374 case OPERATIONAL:
375 printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
376 break;
377 default:
378 printf (_("ERROR - Status '%d' uknown. %d/%d tests passed\n"), status,
379 passed, total);
380 status = -1;
381 break;
382 }
383 return status;
384 }
388 void
389 print_value (value_t * p, threshold_t * t)
390 {
391 printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
392 p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory ",
393 p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
394 p->value > t->threshold ? "Passed" : "Failed");
395 }
399 void
400 print_values (values_t * p, thresholds_t * t)
401 {
402 value_t * value = p->values;
403 threshold_t * threshold = t->thresholds;
404 int i;
405 for (i = 0; i < NR_ATTRIBUTES; i++) {
406 if (value->id && threshold->id && value->id == threshold->id) {
407 print_value (value++, threshold++);
408 }
409 }
410 printf
411 (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
412 p->offline_status,
413 get_offline_text (p->offline_status & 0x7f),
414 (p->offline_status & 0x80 ? "Yes" : "No"),
415 p->offline_timeout / 60);
416 printf
417 (_("OffLineCapability=%d {%s %s %s}\n"),
418 p->offline_capability,
419 p->offline_capability & 1 ? "Immediate" : "",
420 p->offline_capability & 2 ? "Auto" : "",
421 p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
422 printf
423 (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
424 p->revision,
425 p->checksum,
426 p->smart_capability,
427 p->smart_capability & 1 ? "SaveOnStandBy" : "",
428 p->smart_capability & 2 ? "AutoSave" : "");
429 }
433 void
434 print_thresholds (thresholds_t * p)
435 {
436 threshold_t * threshold = p->thresholds;
437 int i;
438 printf ("\n");
439 printf ("SmartRevision=%d\n", p->revision);
440 for (i = 0; i < NR_ATTRIBUTES; i++) {
441 if (threshold->id) {
442 printf ("Id=%3d, Threshold=%3d\n", threshold->id,
443 threshold->threshold); }
444 ++threshold;
445 }
446 printf ("CheckSum=%d\n", p->checksum);
447 }
449 int
450 smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0,
451 char show_error)
452 {
453 int e = 0;
454 __u8 args[4];
455 args[0] = WIN_SMART;
456 args[1] = val0;
457 args[2] = smart_command[command].value;
458 args[3] = 0;
459 if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
460 e = errno;
461 if (show_error) {
462 printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
463 }
464 }
465 return e;
466 }
470 int
471 smart_read_thresholds (int fd, thresholds_t * thresholds)
472 {
473 int e;
474 __u8 args[4 + 512];
475 args[0] = WIN_SMART;
476 args[1] = 0;
477 args[2] = SMART_READ_THRESHOLDS;
478 args[3] = 1;
479 if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
480 e = errno;
481 printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
482 return e;
483 }
484 memcpy (thresholds, args + 4, 512);
485 return 0;
486 }
489 void
490 print_help ()
491 {
492 print_revision (progname, revision);
494 printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
495 printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
496 printf (COPYRIGHT, copyright, email);
498 printf ("\
499 Usage: %s [DEVICE] [OPTION]\n\
500 -d, --device=DEVICE\n\
501 Select device DEVICE\n\
502 -i, --immediate\n\
503 Perform immediately offline tests\n\
504 -q, --quiet-check\n\
505 Returns the number of failed tests\n\
506 -1, --auto-on\n\
507 Turn on automatic offline tests\n\
508 -0, --auto-off\n\
509 Turn off automatic offline tests\n\
510 -n, --net-saint\n\
511 Output suitable for Net Saint\n", progname);
512 }
515 void
516 print_usage (void)
517 {
518 printf ("Usage: %s \n"), progname);
519 }