Code

Display usage if no argv (Reuben Farrelly)
[nagiosplug.git] / plugins / check_ide_smart.c
1 /*
2  *  check_ide-smart v.1 - hacked version of ide-smart for Nagios
3  *  Copyright (C) 2000 Robert Dale <rdale@digital-mission.com>
4  *
5  *  Nagios - http://www.nagios.org
6  *
7  *  Notes:
8  *         ide-smart has the same functionality as before. Some return
9  *         values were changed, otherwise the --nagios option was added.
10  *
11  *         Run with:  check_ide-smart --nagios [-d] <DRIVE>
12  *         Where DRIVE is an IDE drive, ie. /dev/hda, /dev/hdb, /dev/hdc
13  *
14  *           - Returns 0 on no errors
15  *           - Returns 1 on advisories
16  *           - Returns 2 on prefailure
17  *           - Returns -1 not too often
18  *
19  *  ide-smart 1.3 - IDE S.M.A.R.T. checking tool
20  *  Copyright (C) 1998-1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>
21  *                1998      Gadi Oxman <gadio@netvision.net.il>
22  *
23  *  This program is free software; you can redistribute it and/or modify
24  *  it under the terms of the GNU General Public License as published by
25  *  the Free Software Foundation; either version 2 of the License, or
26  *  (at your option) any later version.
27  *
28  *  This program is distributed in the hope that it will be useful,
29  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
30  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31  *  GNU General Public License for more details.
32  *
33  *  You should have received a copy of the GNU General Public License
34  *  along with this program; if not, write to the Free Software
35  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
36  *
37  * $Id$
38  */
40 const char *progname = "check_ide_smart";
41 const char *revision = "$Revision$";
42 const char *copyright = "2000-2004";
43 const char *email = "nagiosplug-devel@lists.sourceforge.net";
44         
45 #include "common.h"
46 #include "utils.h"
48 void print_help (void);
49 void print_usage (void);
51 #include <sys/stat.h>
52 #include <sys/ioctl.h>
53 #include <fcntl.h>
54 #include <linux/hdreg.h>
55 #include <linux/types.h>
56 #include <errno.h>
57         
58 #define NR_ATTRIBUTES   30
59         
60 #ifndef TRUE
61 #define TRUE 1
62 #endif  /*  */
63         
64 #define PREFAILURE 2
65 #define ADVISORY 1
66 #define OPERATIONAL 0
67 #define UNKNOWN -1
69 typedef struct threshold_s
70 {
71         __u8 id;
72         __u8 threshold;
73         __u8 reserved[10];
74 }
75 __attribute__ ((packed)) threshold_t;
77 typedef struct thresholds_s
78 {
79         __u16 revision;
80         threshold_t thresholds[NR_ATTRIBUTES];
81         __u8 reserved[18];
82         __u8 vendor[131];
83         __u8 checksum;
84 }
85 __attribute__ ((packed)) thresholds_t;
87 typedef struct value_s
88 {
89         __u8 id;
90         __u16 status;
91         __u8 value;
92         __u8 vendor[8];
93 }
94 __attribute__ ((packed)) value_t;
96 typedef struct values_s
97 {
98         __u16 revision;
99         value_t values[NR_ATTRIBUTES];
100         __u8 offline_status;
101         __u8 vendor1;
102         __u16 offline_timeout;
103         __u8 vendor2;
104         __u8 offline_capability;
105         __u16 smart_capability;
106         __u8 reserved[16];
107         __u8 vendor[125];
108         __u8 checksum;
110 __attribute__ ((packed)) values_t;
112 struct
114         __u8 value;
115         char *text;
118 offline_status_text[] =
119         {
120                 {0x00, "NeverStarted"},
121                 {0x02, "Completed"},
122                 {0x04, "Suspended"},
123                 {0x05, "Aborted"},
124                 {0x06, "Failed"},
125                 {0, 0}
126         };
128 struct
130         __u8 value;
131         char *text;
134 smart_command[] =
135         {
136                 {SMART_ENABLE, "SMART_ENABLE"},
137                 {SMART_DISABLE, "SMART_DISABLE"},
138                 {SMART_IMMEDIATE_OFFLINE, "SMART_IMMEDIATE_OFFLINE"},
139                 {SMART_AUTO_OFFLINE, "SMART_AUTO_OFFLINE"}
140         };
143 /* Index to smart_command table, keep in order */ 
144 enum SmartCommand 
145         { SMART_CMD_ENABLE,
146                 SMART_CMD_DISABLE,
147                 SMART_CMD_IMMEDIATE_OFFLINE,
148                 SMART_CMD_AUTO_OFFLINE 
149         };
151 void print_values (values_t * p, thresholds_t * t);
152 int smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error); 
154 int
155 main (int argc, char *argv[]) 
157         char *device = NULL;
158         int command = -1;
159         int o, longindex;
160         int retval = 0;
162         thresholds_t thresholds;
163         values_t values;
164         int fd;
166         static struct option longopts[] = { 
167                 {"device", required_argument, 0, 'd'}, 
168                 {"immediate", no_argument, 0, 'i'}, 
169                 {"quiet-check", no_argument, 0, 'q'}, 
170                 {"auto-on", no_argument, 0, '1'}, 
171                 {"auto-off", no_argument, 0, '0'}, 
172                 {"nagios", no_argument, 0, 'n'}, 
173                 {"help", no_argument, 0, 'h'}, 
174                 {"version", no_argument, 0, 'V'}, {0, 0, 0, 0} 
175         };
177         setlocale (LC_ALL, "");
178         bindtextdomain (PACKAGE, LOCALEDIR);
179         textdomain (PACKAGE);
181         while (1) {
182                 
183                 o = getopt_long (argc, argv, "+d:iq10nhV", longopts, &longindex);
185                 switch (o) {
186                 case -1: 
187                                                                 /* 
188                                                                  * bail out of the switch but not the loop, so
189                                                                  * that device can be extracted from argv.
190                                                                  */
191                         break;
192                 case 'd':
193                         device = optarg;
194                         break;
195                 case 'q':
196                         command = 3;
197                         break;
198                 case 'i':
199                         command = 2;
200                         break;
201                 case '1':
202                         command = 1;
203                         break;
204                 case '0':
205                         command = 0;
206                         break;
207                 case 'n':
208                         command = 4;
209                         break;
210                 case 'h':
211                         print_help ();
212                         return STATE_OK;
213                 case 'V':
214                         print_revision (progname, revision);
215                         return STATE_OK;
216                 default:
217                         usage2 (_("Unknown argument"), optarg);
218                 }
220                 if (optind < argc) {
221                         device = argv[optind];
222                 }
224                 if (!device) {
225                         print_help ();
226                         return -1;
227                 }
229                 fd = open (device, O_RDONLY);
231                 if (fd < 0) {
232                         printf (_("CRITICAL - Couldn't open device %s: %s\n"), device, strerror (errno));
233                         return 2;
234                 }
236                 if (smart_cmd_simple (fd, SMART_CMD_ENABLE, 0, TRUE)) {
237                         printf (_("CRITICAL - SMART_CMD_ENABLE\n"));
238                         return 2;
239                 }
241                 switch (command) {
242                 case 0:
243                         retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0, TRUE);
244                         break;
245                 case 1:
246                         retval = smart_cmd_simple (fd, SMART_CMD_AUTO_OFFLINE, 0xF8, TRUE);
247                         break;
248                 case 2:
249                         retval = smart_cmd_simple (fd, SMART_CMD_IMMEDIATE_OFFLINE, 0, TRUE);
250                         break;
251                 case 3:
252                         smart_read_values (fd, &values);
253                         smart_read_thresholds (fd, &thresholds);
254                         retval = values_not_passed (&values, &thresholds);
255                         break;
256                 case 4:
257                         smart_read_values (fd, &values);
258                         smart_read_thresholds (fd, &thresholds);
259                         retval = nagios (&values, &thresholds);
260                         break;
261                 default:
262                         smart_read_values (fd, &values);
263                         smart_read_thresholds (fd, &thresholds);
264                         print_values (&values, &thresholds);
265                         break;
266                 }
267                 close (fd);
268         }
269         return retval;
274 char *
275 get_offline_text (int status) 
277         int i;
278         for (i = 0; offline_status_text[i].text; i++) {
279                 if (offline_status_text[i].value == status) {
280                         return offline_status_text[i].text;
281                 }
282         }
283         return "UNKNOW";
288 int
289 smart_read_values (int fd, values_t * values) 
291         int e;
292         __u8 args[4 + 512];
293         args[0] = WIN_SMART;
294         args[1] = 0;
295         args[2] = SMART_READ_VALUES;
296         args[3] = 1;
297         if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
298                 e = errno;
299                 printf (_("CRITICAL - SMART_READ_VALUES: %s\n"), strerror (errno));
300                 return e;
301         }
302         memcpy (values, args + 4, 512);
303         return 0;
308 int
309 values_not_passed (values_t * p, thresholds_t * t) 
311         value_t * value = p->values;
312         threshold_t * threshold = t->thresholds;
313         int failed = 0;
314         int passed = 0;
315         int i;
316         for (i = 0; i < NR_ATTRIBUTES; i++) {
317                 if (value->id && threshold->id && value->id == threshold->id) {
318                         if (value->value <= threshold->threshold) {
319                                 ++failed;
320                         }
321                         else {
322                                 ++passed;
323                         }
324                 }
325                 ++value;
326                 ++threshold;
327         }
328         return (passed ? -failed : 2);
333 int
334 nagios (values_t * p, thresholds_t * t) 
336         value_t * value = p->values;
337         threshold_t * threshold = t->thresholds;
338         int status = OPERATIONAL;
339         int prefailure = 0;
340         int advisory = 0;
341         int failed = 0;
342         int passed = 0;
343         int total = 0;
344         int i;
345         for (i = 0; i < NR_ATTRIBUTES; i++) {
346                 if (value->id && threshold->id && value->id == threshold->id) {
347                         if (value->value <= threshold->threshold) {
348                                 ++failed;
349                                 if (value->status & 1) {
350                                         status = PREFAILURE;
351                                         ++prefailure;
352                                 }
353                                 else {
354                                         status = ADVISORY;
355                                         ++advisory;
356                                 }
357                         }
358                         else {
359                                 ++passed;
360                         }
361                         ++total;
362                 }
363                 ++value;
364                 ++threshold;
365         }
366         switch (status) {
367         case PREFAILURE:
368                 printf (_("CRITICAL - %d Harddrive PreFailure%cDetected! %d/%d tests failed.\n"),
369                         prefailure,
370                         prefailure > 1 ? 's' : ' ',
371                         failed,
372                   total);
373                 break;
374         case ADVISORY:
375                 printf (_("WARNING - %d Harddrive Advisor%s Detected. %d/%d tests failed.\n"),
376                         advisory,
377                         advisory > 1 ? "ies" : "y",
378                         failed,
379                         total);
380                 break;
381         case OPERATIONAL:
382                 printf (_("OK - Operational (%d/%d tests passed)\n"), passed, total);
383                 break;
384         default:
385                 printf (_("ERROR - Status '%d' uknown. %d/%d tests passed\n"), status,
386                                                 passed, total);
387                 status = -1;
388                 break;
389         }
390         return status;
395 void
396 print_value (value_t * p, threshold_t * t) 
398         printf ("Id=%3d, Status=%2d {%s , %s}, Value=%3d, Threshold=%3d, %s\n",
399                                         p->id, p->status, p->status & 1 ? "PreFailure" : "Advisory   ",
400                                         p->status & 2 ? "OnLine " : "OffLine", p->value, t->threshold,
401                                         p->value > t->threshold ? "Passed" : "Failed");
406 void
407 print_values (values_t * p, thresholds_t * t)
409         value_t * value = p->values;
410         threshold_t * threshold = t->thresholds;
411         int i;
412         for (i = 0; i < NR_ATTRIBUTES; i++) {
413                 if (value->id && threshold->id && value->id == threshold->id) {
414                         print_value (value++, threshold++);
415                 }
416         }
417         printf
418                 (_("OffLineStatus=%d {%s}, AutoOffLine=%s, OffLineTimeout=%d minutes\n"),
419                  p->offline_status,
420                  get_offline_text (p->offline_status & 0x7f),
421                  (p->offline_status & 0x80 ? "Yes" : "No"),
422                  p->offline_timeout / 60);
423         printf
424                 (_("OffLineCapability=%d {%s %s %s}\n"),
425                  p->offline_capability,
426                  p->offline_capability & 1 ? "Immediate" : "",
427                  p->offline_capability & 2 ? "Auto" : "",
428                  p->offline_capability & 4 ? "AbortOnCmd" : "SuspendOnCmd");
429         printf
430                 (_("SmartRevision=%d, CheckSum=%d, SmartCapability=%d {%s %s}\n"),
431                  p->revision,
432                  p->checksum,
433                  p->smart_capability,
434                  p->smart_capability & 1 ? "SaveOnStandBy" : "",
435                  p->smart_capability & 2 ? "AutoSave" : "");
440 void
441 print_thresholds (thresholds_t * p) 
443         threshold_t * threshold = p->thresholds;
444         int i;
445         printf ("\n");
446         printf ("SmartRevision=%d\n", p->revision);
447         for (i = 0; i < NR_ATTRIBUTES; i++) {
448                 if (threshold->id) {
449                         printf ("Id=%3d, Threshold=%3d\n", threshold->id,
450                                                         threshold->threshold); }
451                 ++threshold;
452         }
453         printf ("CheckSum=%d\n", p->checksum);
456 int
457 smart_cmd_simple (int fd, enum SmartCommand command, __u8 val0, char show_error) 
459         int e = 0;
460         __u8 args[4];
461         args[0] = WIN_SMART;
462         args[1] = val0;
463         args[2] = smart_command[command].value;
464         args[3] = 0;
465         if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
466                 e = errno;
467                 if (show_error) {
468                         printf (_("CRITICAL - %s: %s\n"), smart_command[command].text, strerror (errno));
469                 }
470         }
471         return e;
476 int
477 smart_read_thresholds (int fd, thresholds_t * thresholds) 
479         int e;
480         __u8 args[4 + 512];
481         args[0] = WIN_SMART;
482   args[1] = 0;
483   args[2] = SMART_READ_THRESHOLDS;
484   args[3] = 1;
485         if (ioctl (fd, HDIO_DRIVE_CMD, &args)) {
486                 e = errno;
487                 printf (_("CRITICAL - SMART_READ_THRESHOLDS: %s\n"), strerror (errno));
488                 return e;
489         }
490         memcpy (thresholds, args + 4, 512);
491         return 0;
495 void
496 print_help (void)
498         print_revision (progname, revision);
500         printf ("Nagios feature - 1999 Robert Dale <rdale@digital-mission.com>\n");
501         printf ("(C) 1999 Ragnar Hojland Espinosa <ragnar@lightside.dhis.org>\n");
502         printf (COPYRIGHT, copyright, email);
504         printf(_("This plugin checks a local hard drive with the (Linux specific) SMART interface [http://smartlinux.sourceforge.net/smart/index.php].\n\n"));
505         
506         printf ("\
507 Usage: %s [OPTION] [DEVICE]\n\
508  -d, --device=DEVICE\n\
509     Select device DEVICE\n\
510     Note: if the device is selected with this option, _no_ other options are accepted\n\
511  -i, --immediate\n\
512     Perform immediately offline tests\n\
513  -q, --quiet-check\n\
514     Returns the number of failed tests\n\
515  -1, --auto-on\n\
516     Turn on automatic offline tests\n\
517  -0, --auto-off\n\
518     Turn off automatic offline tests\n\
519  -n, --nagios\n\
520     Output suitable for Nagios\n", progname);
524 void
525 print_usage (void)
527         printf ("\
528 Usage: %s [-d <device>] [-i <immediate>] [-q quiet] [-1 <auto-on>]\n\
529                         [-O <auto-off>] [-n <nagios>]\n", progname);