From 5e77bd677c549c4c1f81b8b8f7d78b8fd193749a Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Fri, 7 Nov 2014 15:13:27 +0100 Subject: [PATCH] smart: add a SMART plugin MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This plugin uses libatasmart: http://0pointer.de/blog/projects/being-smart.html As libatasmart is Linux-only, the plugin is therefore Linux-only too. The disks are discovered through libudev. Each SMART attribute is extracted. The current value, worst value, threshold value (if any) are recorded. Those are normalized values (between 0 and 255, higher is better). For some values, it makes more sense to record the raw value. libatasmart is converting this raw value to something sensible. We record that form. Sometimes, this is just the raw value but sometimes this is converted to another scale (for example, the temperature). People should know what each attribute means before using those values. Otherwise, the normalized values are better. Four values are (power-on time, power cycle count, bad sectors and temperature) are also recorded on their own. Those are usually the values that the user care about the most. Here is an excerpt of the plugin output with the CSV plugin (the SSD disk on my laptop doesn't provide a temperature sensor): . └── zoro.exoscale.ch └── smart-sda ├── smart_attribute-attribute-173-2014-11-10 ├── smart_attribute-attribute-174-2014-11-10 ├── smart_attribute-available-reserved-space-2014-11-10 ├── smart_attribute-end-to-end-error-2014-11-10 ├── smart_attribute-erase-fail-count-2014-11-10 ├── smart_attribute-power-cycle-count-2014-11-10 ├── smart_attribute-power-on-hours-2014-11-10 ├── smart_attribute-power-on-seconds-2-2014-11-10 ├── smart_attribute-program-fail-count-2014-11-10 ├── smart_attribute-reallocated-sector-count-2014-11-10 ├── smart_attribute-reported-uncorrect-2014-11-10 ├── smart_attribute-total-lbas-read-2014-11-10 ├── smart_attribute-total-lbas-written-2014-11-10 ├── smart_attribute-udma-crc-error-count-2014-11-10 ├── smart_attribute-unused-reserved-blocks-2014-11-10 ├── smart_attribute-used-reserved-blocks-chip-2014-11-10 ├── smart_badsectors-2014-11-10 ├── smart_powercycles-2014-11-10 └── smart_poweron-2014-11-10 $ cat zoro.exoscale.ch/smart-sda/smart_attribute-total-lbas-read-2014-11-10 epoch,current,worst,threshold,pretty 1415613266.376,100.000000,100.000000,0.000000,281018.000000 1415613276.395,100.000000,100.000000,0.000000,281018.000000 1415613286.384,100.000000,100.000000,0.000000,281051.000000 1415613296.383,100.000000,100.000000,0.000000,281051.000000 --- README | 4 + configure.ac | 59 ++++++++++ src/Makefile.am | 11 ++ src/collectd.conf.in | 6 + src/collectd.conf.pod | 34 ++++++ src/smart.c | 250 ++++++++++++++++++++++++++++++++++++++++++ src/types.db | 5 + 7 files changed, 369 insertions(+) create mode 100644 src/smart.c diff --git a/README b/README index 3e3a030b..7aa83b07 100644 --- a/README +++ b/README @@ -293,6 +293,10 @@ Features to have its measurements fed to collectd. This includes multimeters, sound level meters, thermometers, and much more. + - smart + Collect SMART statistics, notably load cycle count, temperature + and bad sectors. + - snmp Read values from SNMP (Simple Network Management Protocol) enabled network devices such as switches, routers, thermometers, rack monitoring diff --git a/configure.ac b/configure.ac index a00eebb1..b6f35c8c 100644 --- a/configure.ac +++ b/configure.ac @@ -4823,6 +4823,62 @@ then fi # }}} +# --with-libatasmart {{{ +with_libatasmart_cppflags="" +with_libatasmart_ldflags="" +AC_ARG_WITH(libatasmart, [AS_HELP_STRING([--with-libatasmart@<:@=PREFIX@:>@], [Path to libatasmart.])], +[ + if test "x$withval" != "xno" && test "x$withval" != "xyes" + then + with_libatasmart_cppflags="-I$withval/include" + with_libatasmart_ldflags="-L$withval/lib" + with_libatasmart="yes" + else + with_libatasmart="$withval" + fi +], +[ + if test "x$ac_system" = "xLinux" + then + with_libatasmart="yes" + else + with_libatasmart="no (Linux only library)" + fi +]) +if test "x$with_libatasmart" = "xyes" +then + SAVE_CPPFLAGS="$CPPFLAGS" + CPPFLAGS="$CPPFLAGS $with_libatasmart_cppflags" + + AC_CHECK_HEADERS(atasmart.h, [with_libatasmart="yes"], [with_libatasmart="no (atasmart.h not found)"]) + + CPPFLAGS="$SAVE_CPPFLAGS" +fi +if test "x$with_libatasmart" = "xyes" +then + SAVE_CPPFLAGS="$CPPFLAGS" + SAVE_LDFLAGS="$LDFLAGS" + CPPFLAGS="$CPPFLAGS $with_libatasmart_cppflags" + LDFLAGS="$LDFLAGS $with_libatasmart_ldflags" + + AC_CHECK_LIB(atasmart, sk_disk_open, [with_libatasmart="yes"], [with_libatasmart="no (Symbol 'sk_disk_open' not found)"]) + + CPPFLAGS="$SAVE_CPPFLAGS" + LDFLAGS="$SAVE_LDFLAGS" +fi +if test "x$with_libatasmart" = "xyes" +then + BUILD_WITH_LIBATASMART_CPPFLAGS="$with_libatasmart_cppflags" + BUILD_WITH_LIBATASMART_LDFLAGS="$with_libatasmart_ldflags" + BUILD_WITH_LIBATASMART_LIBS="-latasmart" + AC_SUBST(BUILD_WITH_LIBATASMART_CPPFLAGS) + AC_SUBST(BUILD_WITH_LIBATASMART_LDFLAGS) + AC_SUBST(BUILD_WITH_LIBATASMART_LIBS) + AC_DEFINE(HAVE_LIBATASMART, 1, [Define if libatasmart is present and usable.]) +fi +AM_CONDITIONAL(BUILD_WITH_LIBATASMART, test "x$with_libatasmart" = "xyes") +# }}} + PKG_CHECK_MODULES([LIBNOTIFY], [libnotify], [with_libnotify="yes"], [if test "x$LIBNOTIFY_PKG_ERRORS" = "x"; then @@ -5404,6 +5460,7 @@ AC_PLUGIN([rrdtool], [$with_librrd], [RRDTool output plugin]) AC_PLUGIN([sensors], [$with_libsensors], [lm_sensors statistics]) AC_PLUGIN([serial], [$plugin_serial], [serial port traffic]) AC_PLUGIN([sigrok], [$with_libsigrok], [sigrok acquisition sources]) +AC_PLUGIN([smart], [$with_libatasmart], [SMART statistics]) AC_PLUGIN([snmp], [$with_libnetsnmp], [SNMP querying plugin]) AC_PLUGIN([statsd], [yes], [StatsD plugin]) AC_PLUGIN([swap], [$plugin_swap], [Swap usage statistics]) @@ -5624,6 +5681,7 @@ Configuration: Libraries: intel mic . . . . . . $with_mic libaquaero5 . . . . . $with_libaquaero5 + libatasmart . . . . . $with_libatasmart libcurl . . . . . . . $with_libcurl libdbi . . . . . . . $with_libdbi libcredis . . . . . . $with_libcredis @@ -5768,6 +5826,7 @@ Configuration: sensors . . . . . . . $enable_sensors serial . . . . . . . $enable_serial sigrok . . . . . . . $enable_sigrok + smart . . . . . . . . $enable_smart snmp . . . . . . . . $enable_snmp statsd . . . . . . . $enable_statsd swap . . . . . . . . $enable_swap diff --git a/src/Makefile.am b/src/Makefile.am index 4e62675f..9ffcdbc1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -885,6 +885,17 @@ sigrok_la_LDFLAGS = $(PLUGIN_LDFLAGS) $(BUILD_WITH_LIBSIGROK_LDFLAGS) sigrok_la_LIBADD = -lsigrok endif +if BUILD_PLUGIN_SMART +if BUILD_WITH_LIBUDEV +pkglib_LTLIBRARIES += smart.la +smart_la_SOURCES = smart.c \ + utils_ignorelist.c utils_ignorelist.h +smart_la_CFLAGS = $(AM_CFLAGS) $(BUILD_WITH_LIBATASMART_CPPFLAGS) +smart_la_LDFLAGS = $(PLUGIN_LDFLAGS) $(BUILD_WITH_LIBATASMART_LDFLAGS) +smart_la_LIBADD = $(BUILD_WITH_LIBATASMART_LIBS) -ludev +endif +endif + if BUILD_PLUGIN_SNMP pkglib_LTLIBRARIES += snmp.la snmp_la_SOURCES = snmp.c diff --git a/src/collectd.conf.in b/src/collectd.conf.in index fabf6344..8e7f3fcc 100644 --- a/src/collectd.conf.in +++ b/src/collectd.conf.in @@ -171,6 +171,7 @@ #@BUILD_PLUGIN_SENSORS_TRUE@LoadPlugin sensors #@BUILD_PLUGIN_SERIAL_TRUE@LoadPlugin serial #@BUILD_PLUGIN_SIGROK_TRUE@LoadPlugin sigrok +#@BUILD_PLUGIN_SMART_TRUE@LoadPlugin smart #@BUILD_PLUGIN_SNMP_TRUE@LoadPlugin snmp #@BUILD_PLUGIN_STATSD_TRUE@LoadPlugin statsd #@BUILD_PLUGIN_SWAP_TRUE@LoadPlugin swap @@ -971,6 +972,11 @@ # # +# +# Disk "/^[hs]d[a-f][0-9]?$/" +# IgnoreSelected false +# + # # # Type "voltage" diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index da2030dd..81c1ac7a 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -5639,6 +5639,40 @@ measurements are discarded. =back +=head2 Plugin C + +The C plugin collects SMART information from physical +disks. Values collectd include temperature, power cycle count, poweron +time and bad sectors. Also, all SMART attributes are collected along +with the normalized current value, the worst value, the threshold and +a human readable value. + +Using the following two options you can ignore some disks or configure the +collection only of specific disks. + +=over 4 + +=item B I + +Select the disk I. Whether it is collected or ignored depends on the +B setting, see below. As with other plugins that use the +daemon's ignorelist functionality, a string that starts and ends with a slash +is interpreted as a regular expression. Examples: + + Disk "sdd" + Disk "/hda[34]/" + +=item B B|B + +Sets whether selected disks, i.Ee. the ones matches by any of the B +statements, are ignored or if all other disks are ignored. The behavior +(hopefully) is intuitive: If no B option is configured, all disks are +collected. If at least one B option is given and no B or +set to B, B matching disks will be collected. If B +is set to B, all disks are collected B the ones matched. + +=back + =head2 Plugin C Since the configuration of the C is a little more complicated than diff --git a/src/smart.c b/src/smart.c new file mode 100644 index 00000000..6c01e677 --- /dev/null +++ b/src/smart.c @@ -0,0 +1,250 @@ +/** + * collectd - src/smart.c + * Copyright (C) 2014 Vincent Bernat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Vincent Bernat + **/ + +#include "collectd.h" +#include "common.h" +#include "plugin.h" +#include "utils_ignorelist.h" + +#include +#include + +static const char *config_keys[] = +{ + "Disk", + "IgnoreSelected" +}; + +static int config_keys_num = STATIC_ARRAY_SIZE (config_keys); + +static ignorelist_t *ignorelist = NULL; + +static int smart_config (const char *key, const char *value) +{ + if (ignorelist == NULL) + ignorelist = ignorelist_create (/* invert = */ 1); + if (ignorelist == NULL) + return (1); + + if (strcasecmp ("Disk", key) == 0) + { + ignorelist_add (ignorelist, value); + } + else if (strcasecmp ("IgnoreSelected", key) == 0) + { + int invert = 1; + if (IS_TRUE (value)) + invert = 0; + ignorelist_set_invert (ignorelist, invert); + } + else + { + return (-1); + } + + return (0); +} /* int smart_config */ + +static void smart_submit (const char *dev, char *type, char *type_inst, double value) +{ + value_t values[1]; + value_list_t vl = VALUE_LIST_INIT; + + values[0].gauge = value; + + vl.values = values; + vl.values_len = 1; + sstrncpy (vl.host, hostname_g, sizeof (vl.host)); + sstrncpy (vl.plugin, "smart", sizeof (vl.plugin)); + sstrncpy (vl.plugin_instance, dev, sizeof (vl.plugin_instance)); + sstrncpy (vl.type, type, sizeof (vl.type)); + sstrncpy (vl.type_instance, type_inst, sizeof (vl.type_instance)); + + plugin_dispatch_values (&vl); +} + +static void smart_handle_disk_attribute(SkDisk *d, const SkSmartAttributeParsedData *a, + void* userdata) +{ + const char *dev = userdata; + value_t values[4]; + value_list_t vl = VALUE_LIST_INIT; + + if (!a->current_value_valid || !a->worst_value_valid) return; + values[0].gauge = a->current_value; + values[1].gauge = a->worst_value; + values[2].gauge = a->threshold_valid?a->threshold:0; + values[3].gauge = a->pretty_value; + + vl.values = values; + vl.values_len = 4; + sstrncpy (vl.host, hostname_g, sizeof (vl.host)); + sstrncpy (vl.plugin, "smart", sizeof (vl.plugin)); + sstrncpy (vl.plugin_instance, dev, sizeof (vl.plugin_instance)); + sstrncpy (vl.type, "smart_attribute", sizeof (vl.type)); + sstrncpy (vl.type_instance, a->name, sizeof (vl.type_instance)); + + plugin_dispatch_values (&vl); +} + +static void smart_handle_disk (const char *dev) +{ + SkDisk *d = NULL; + SkBool awake = FALSE; + SkBool available = FALSE; + const char *shortname; + const SkSmartParsedData *spd; + uint64_t poweron, powercycles, badsectors, temperature; + + shortname = strrchr(dev, '/'); + if (!shortname) return; + shortname++; + if (ignorelist_match (ignorelist, shortname) != 0) { + DEBUG ("smart plugin: ignoring %s.", dev); + return; + } + + DEBUG ("smart plugin: checking SMART status of %s.", + dev); + + if (sk_disk_open (dev, &d) < 0) + { + ERROR ("smart plugin: unable to open %s.", dev); + return; + } + if (sk_disk_identify_is_available (d, &available) < 0 || !available) + { + DEBUG ("smart plugin: disk %s cannot be identified.", dev); + goto end; + } + if (sk_disk_smart_is_available (d, &available) < 0 || !available) + { + DEBUG ("smart plugin: disk %s has no SMART support.", dev); + goto end; + } + if (sk_disk_check_sleep_mode (d, &awake) < 0 || !awake) + { + DEBUG ("smart plugin: disk %s is sleeping.", dev); + goto end; + } + if (sk_disk_smart_read_data (d) < 0) + { + ERROR ("smart plugin: unable to get SMART data for disk %s.", dev); + goto end; + } + if (sk_disk_smart_parse (d, &spd) < 0) + { + ERROR ("smart plugin: unable to parse SMART data for disk %s.", dev); + goto end; + } + + /* Get some specific values */ + if (sk_disk_smart_get_power_on (d, &poweron) < 0) + { + WARNING ("smart plugin: unable to get milliseconds since power on for %s.", + dev); + } + else + smart_submit (shortname, "smart_poweron", "", poweron / 1000.); + + if (sk_disk_smart_get_power_cycle (d, &powercycles) < 0) + { + WARNING ("smart plugin: unable to get number of power cycles for %s.", + dev); + } + else + smart_submit (shortname, "smart_powercycles", "", powercycles); + + if (sk_disk_smart_get_bad (d, &badsectors) < 0) + { + WARNING ("smart plugin: unable to get number of bad sectors for %s.", + dev); + } + else + smart_submit (shortname, "smart_badsectors", "", badsectors); + + if (sk_disk_smart_get_temperature (d, &temperature) < 0) + { + WARNING ("smart plugin: unable to get temperature for %s.", + dev); + } + else + smart_submit (shortname, "smart_temperature", "", temperature / 1000. - 273.15); + + /* Grab all attributes */ + if (sk_disk_smart_parse_attributes(d, smart_handle_disk_attribute, + (char *)shortname) < 0) + { + ERROR ("smart plugin: unable to handle SMART attributes for %s.", + dev); + } + +end: + sk_disk_free(d); +} + +static int smart_read (void) +{ + struct udev *handle_udev; + struct udev_enumerate *enumerate; + struct udev_list_entry *devices, *dev_list_entry; + struct udev_device *dev; + + /* Use udev to get a list of disks */ + handle_udev = udev_new(); + if (!handle_udev) + { + ERROR ("smart plugin: unable to initialize udev."); + return (-1); + } + enumerate = udev_enumerate_new (handle_udev); + udev_enumerate_add_match_subsystem (enumerate, "block"); + udev_enumerate_add_match_property (enumerate, "DEVTYPE", "disk"); + udev_enumerate_scan_devices (enumerate); + devices = udev_enumerate_get_list_entry (enumerate); + udev_list_entry_foreach (dev_list_entry, devices) + { + const char *path, *devpath; + path = udev_list_entry_get_name (dev_list_entry); + dev = udev_device_new_from_syspath (handle_udev, path); + devpath = udev_device_get_devnode (dev); + + /* Query status with libatasmart */ + smart_handle_disk (devpath); + } + + udev_enumerate_unref (enumerate); + udev_unref (handle_udev); + + return (0); +} /* int smart_read */ + +void module_register (void) +{ + plugin_register_config ("smart", smart_config, + config_keys, config_keys_num); + plugin_register_read ("smart", smart_read); +} /* void module_register */ diff --git a/src/types.db b/src/types.db index 64137b07..ec34bd43 100644 --- a/src/types.db +++ b/src/types.db @@ -168,6 +168,11 @@ serial_octets rx:DERIVE:0:U, tx:DERIVE:0:U signal_noise value:GAUGE:U:0 signal_power value:GAUGE:U:0 signal_quality value:GAUGE:0:U +smart_poweron value:GAUGE:0:U +smart_powercycles value:GAUGE:0:U +smart_badsectors value:GAUGE:0:U +smart_temperature value:GAUGE:-300:300 +smart_attribute current:GAUGE:0:255, worst:GAUGE:0:255, threshold:GAUGE:0:255, pretty:GAUGE:0:U snr value:GAUGE:0:U spam_check value:GAUGE:0:U spam_score value:GAUGE:U:U -- 2.30.2