summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: d53b119)
raw | patch | inline | side by side (parent: d53b119)
author | Florian Forster <octo@collectd.org> | |
Sat, 22 Jun 2013 11:21:27 +0000 (13:21 +0200) | ||
committer | Florian Forster <octo@collectd.org> | |
Sat, 22 Jun 2013 11:21:27 +0000 (13:21 +0200) |
src/Makefile.am | patch | blob | history | |
src/collectd.conf.in | patch | blob | history | |
src/collectd.conf.pod | patch | blob | history | |
src/statsd.c | patch | blob | history | |
src/utils_latency.c | [new file with mode: 0644] | patch | blob |
src/utils_latency.h | [new file with mode: 0644] | patch | blob |
diff --git a/src/Makefile.am b/src/Makefile.am
index b443ae321e59cc89ff5b419301d45f7ab2179961..9f16fd7cb7edb7486f7d4e06965a6b98bf389c31 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
if BUILD_PLUGIN_STATSD
pkglib_LTLIBRARIES += statsd.la
-statsd_la_SOURCES = statsd.c
+statsd_la_SOURCES = statsd.c \
+ utils_latency.h utils_latency.c
statsd_la_LDFLAGS = -module -avoid-version
statsd_la_LIBADD = -lpthread
collectd_LDADD += "-dlopen" statsd.la
diff --git a/src/collectd.conf.in b/src/collectd.conf.in
index 98a7596706eb1d1107260d92544b225f0ef3e6ef..c5ab3ce83a31ab4e063b3ff63fcaf6ccc1dea409 100644 (file)
--- a/src/collectd.conf.in
+++ b/src/collectd.conf.in
# DeleteTimers false
# DeleteGauges false
# DeleteSets false
+# TimerPercentile 90.0
#</Plugin>
#<Plugin "swap">
diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod
index 39b3792d351abf7aa7d823211384e9f0ca63a694..4a34fe5df3780ad76bb945be0a0ed1c926a883e7 100644 (file)
--- a/src/collectd.conf.pod
+++ b/src/collectd.conf.pod
@@ -5141,6 +5141,15 @@ rate of counters and size of sets will be zero, timers report C<NaN> and gauges
are unchanged. If set to B<True>, the such metrics are not dispatched and
removed from the internal cache.
+=item B<TimerPercentile> I<Percent>
+
+Calculate and dispatch the configured percentile, i.e. compute the latency, so
+that I<Percent> of all reported timers are smaller than or equal to the
+computed latency. This is useful for cutting off the long tail latency, as it's
+often done in I<Service Level Agreements> (SLAs).
+
+If not specified, no percentile is calculated / dispatched.
+
=back
=head2 Plugin C<swap>
diff --git a/src/statsd.c b/src/statsd.c
index 9443fed40f55485f797bb54a0b538ad1c6a50f09..2def1914d69614dda6fb0f13c031abc6345d740d 100644 (file)
--- a/src/statsd.c
+++ b/src/statsd.c
#include "configfile.h"
#include "utils_avltree.h"
#include "utils_complain.h"
+#include "utils_latency.h"
#include <pthread.h>
{
metric_type_t type;
int64_t value;
+ latency_counter_t *latency;
c_avl_tree_t *set;
unsigned long updates_num;
};
static _Bool conf_delete_gauges = 0;
static _Bool conf_delete_sets = 0;
+static double *conf_timer_percentile = NULL;
+static size_t conf_timer_percentile_num = 0;
+
/* Must hold metrics_lock when calling this function. */
-static int statsd_metric_set_unsafe (char const *name, int64_t value, /* {{{ */
+static statsd_metric_t *statsd_metric_lookup_unsafe (char const *name,
metric_type_t type)
{
+ char const *prefix;
+ char key[DATA_MAX_NAME_LEN + 2];
+ char *key_copy;
statsd_metric_t *metric;
- char *key;
int status;
- status = c_avl_get (metrics_tree, name, (void *) &metric);
- if (status == 0)
+ switch (type)
{
- metric->value = value;
- metric->updates_num++;
+ case STATSD_COUNTER: prefix = "c"; break;
+ case STATSD_TIMER: prefix = "t"; break;
+ case STATSD_GAUGE: prefix = "g"; break;
+ case STATSD_SET: prefix = "s"; break;
+ default: return (NULL);
+ }
- return (0);
+ ssnprintf (key, sizeof (key), "%s:%s", prefix, name);
+
+ status = c_avl_get (metrics_tree, key, (void *) &metric);
+ if (status == 0)
+ return (metric);
+
+ key_copy = strdup (key);
+ if (key_copy == NULL)
+ {
+ ERROR ("statsd plugin: strdup failed.");
+ return (NULL);
}
- DEBUG ("stats plugin: Adding new metric \"%s\".", name);
- key = strdup (name);
- metric = calloc (1, sizeof (*metric));
- if ((key == NULL) || (metric == NULL))
+ metric = malloc (sizeof (*metric));
+ if (metric == NULL)
{
- sfree (key);
- sfree (metric);
- return (-1);
+ ERROR ("statsd plugin: malloc failed.");
+ sfree (key_copy);
+ return (NULL);
}
+ memset (metric, 0, sizeof (*metric));
metric->type = type;
- metric->value = value;
- metric->updates_num = 1;
+ metric->latency = NULL;
+ metric->set = NULL;
- status = c_avl_insert (metrics_tree, key, metric);
+ status = c_avl_insert (metrics_tree, key_copy, metric);
if (status != 0)
{
- sfree (key);
+ ERROR ("statsd plugin: c_avl_insert failed.");
+ sfree (key_copy);
sfree (metric);
-
- return (-1);
+ return (NULL);
}
- return (0);
-} /* }}} int statsd_metric_set_unsafe */
+ return (metric);
+} /* }}} statsd_metric_lookup_unsafe */
static int statsd_metric_set (char const *name, int64_t value, /* {{{ */
metric_type_t type)
{
- int status;
+ statsd_metric_t *metric;
pthread_mutex_lock (&metrics_lock);
- status = statsd_metric_set_unsafe (name, value, type);
+
+ metric = statsd_metric_lookup_unsafe (name, type);
+ if (metric == NULL)
+ {
+ pthread_mutex_unlock (&metrics_lock);
+ return (-1);
+ }
+
+ metric->value = value;
+ metric->updates_num++;
+
pthread_mutex_unlock (&metrics_lock);
- return (status);
+ return (0);
} /* }}} int statsd_metric_set */
static int statsd_metric_add (char const *name, int64_t delta, /* {{{ */
metric_type_t type)
{
statsd_metric_t *metric;
- int status;
pthread_mutex_lock (&metrics_lock);
- status = c_avl_get (metrics_tree, name, (void *) &metric);
- if (status == 0)
+ metric = statsd_metric_lookup_unsafe (name, type);
+ if (metric == NULL)
{
- metric->value += delta;
- metric->updates_num++;
-
pthread_mutex_unlock (&metrics_lock);
- return (0);
+ return (-1);
}
- else /* no such value yet */
- {
- status = statsd_metric_set_unsafe (name, delta, type);
- pthread_mutex_unlock (&metrics_lock);
- return (status);
- }
+ metric->value += delta;
+ metric->updates_num++;
+
+ pthread_mutex_unlock (&metrics_lock);
+
+ return (0);
} /* }}} int statsd_metric_add */
static int statsd_handle_counter (char const *name, /* {{{ */
static int statsd_handle_timer (char const *name, /* {{{ */
char const *value_str)
{
- char key[DATA_MAX_NAME_LEN + 2];
- value_t value;
+ statsd_metric_t *metric;
+ value_t value_ms;
+ cdtime_t value;
int status;
- value.derive = 0;
- status = parse_value (value_str, &value, DS_TYPE_DERIVE);
+ value_ms.derive = 0;
+ status = parse_value (value_str, &value_ms, DS_TYPE_DERIVE);
if (status != 0)
return (status);
- ssnprintf (key, sizeof (key), "t:%s", name);
+ value = MS_TO_CDTIME_T (value_ms.derive);
+
+ pthread_mutex_lock (&metrics_lock);
+
+ metric = statsd_metric_lookup_unsafe (name, STATSD_TIMER);
+ if (metric == NULL)
+ {
+ pthread_mutex_unlock (&metrics_lock);
+ return (-1);
+ }
+
+ if (metric->latency == NULL)
+ metric->latency = latency_counter_create ();
+ if (metric->latency == NULL)
+ {
+ pthread_mutex_unlock (&metrics_lock);
+ return (-1);
+ }
+
+ latency_counter_add (metric->latency, value);
+ metric->updates_num++;
- return (statsd_metric_add (key, (int64_t) value.derive, STATSD_TIMER));
+ pthread_mutex_unlock (&metrics_lock);
+ return (0);
} /* }}} int statsd_handle_timer */
-static int statsd_handle_set (char const *key_orig, /* {{{ */
- char const *name_orig)
+static int statsd_handle_set (char const *name, /* {{{ */
+ char const *set_key_orig)
{
- char key[DATA_MAX_NAME_LEN + 2];
- char *name;
statsd_metric_t *metric = NULL;
+ char *set_key;
int status;
- ssnprintf (key, sizeof (key), "s:%s", key_orig);
-
pthread_mutex_lock (&metrics_lock);
- status = c_avl_get (metrics_tree, key, (void *) &metric);
- if (status != 0) /* Create a new metric */
+ metric = statsd_metric_lookup_unsafe (name, STATSD_SET);
+ if (metric == NULL)
{
- char *key_copy;
-
- DEBUG ("stats plugin: Adding new metric \"%s\".", key);
- key_copy = strdup (key);
- if (key_copy == NULL)
- {
- pthread_mutex_unlock (&metrics_lock);
- ERROR ("statsd plugin: strdup failed.");
- return (-1);
- }
-
- metric = calloc (1, sizeof (*metric));
- if (metric == NULL)
- {
- pthread_mutex_unlock (&metrics_lock);
- ERROR ("statsd plugin: calloc failed.");
- sfree (key_copy);
- return (-1);
- }
- metric->type = STATSD_SET;
- metric->set = NULL;
-
- status = c_avl_insert (metrics_tree, key_copy, metric);
- if (status != 0)
- {
- pthread_mutex_unlock (&metrics_lock);
- ERROR ("statsd plugin: c_avl_insert (\"%s\") failed with status %i.",
- key_copy, status);
- sfree (key_copy);
- sfree (metric);
- return (-1);
- }
+ pthread_mutex_unlock (&metrics_lock);
+ return (-1);
}
- assert (metric != NULL);
/* Make sure metric->set exists. */
if (metric->set == NULL)
return (-1);
}
- name = strdup (name_orig);
- if (name == NULL)
+ set_key = strdup (set_key_orig);
+ if (set_key == NULL)
{
pthread_mutex_unlock (&metrics_lock);
ERROR ("statsd plugin: strdup failed.");
return (-1);
}
- status = c_avl_insert (metric->set, name, /* value = */ NULL);
+ status = c_avl_insert (metric->set, set_key, /* value = */ NULL);
if (status < 0)
{
pthread_mutex_unlock (&metrics_lock);
if (status < 0)
ERROR ("statsd plugin: c_avl_insert (\"%s\") failed with status %i.",
- name, status);
- sfree (name);
+ set_key, status);
+ sfree (set_key);
return (-1);
}
else if (status > 0) /* key already exists */
{
- sfree (name);
+ sfree (set_key);
}
metric->updates_num++;
return ((void *) 0);
} /* }}} void *statsd_network_thread */
+static int statsd_config_timer_percentile (oconfig_item_t *ci) /* {{{ */
+{
+ double percent = NAN;
+ double *tmp;
+ int status;
+
+ status = cf_util_get_double (ci, &percent);
+ if (status != 0)
+ return (status);
+
+ if ((percent <= 0.0) || (percent >= 100))
+ {
+ ERROR ("statsd plugin: The value for \"%s\" must be between 0 and 100, "
+ "exclusively.", ci->key);
+ return (ERANGE);
+ }
+
+ tmp = realloc (conf_timer_percentile,
+ sizeof (*conf_timer_percentile) * (conf_timer_percentile_num + 1));
+ if (tmp == NULL)
+ {
+ ERROR ("statsd plugin: realloc failed.");
+ return (ENOMEM);
+ }
+ conf_timer_percentile = tmp;
+ conf_timer_percentile[conf_timer_percentile_num] = percent;
+ conf_timer_percentile_num++;
+
+ return (0);
+} /* }}} int statsd_config_timer_percentile */
+
static int statsd_config (oconfig_item_t *ci) /* {{{ */
{
int i;
cf_util_get_boolean (child, &conf_delete_gauges);
else if (strcasecmp ("DeleteSets", child->key) == 0)
cf_util_get_boolean (child, &conf_delete_sets);
+ else if (strcasecmp ("TimerPercentile", child->key) == 0)
+ statsd_config_timer_percentile (child);
else
ERROR ("statsd plugin: The \"%s\" config option is not valid.",
child->key);
value_t values[1];
value_list_t vl = VALUE_LIST_INIT;
- if (metric->type == STATSD_GAUGE)
- values[0].gauge = (gauge_t) metric->value;
- else if (metric->type == STATSD_TIMER)
- {
- if (metric->updates_num == 0)
- values[0].gauge = NAN;
- else
- values[0].gauge =
- ((gauge_t) metric->value) / ((gauge_t) metric->updates_num);
- }
- else if (metric->type == STATSD_SET)
- {
- if (metric->set == NULL)
- values[0].gauge = 0.0;
- else
- values[0].gauge = (gauge_t) c_avl_size (metric->set);
- }
- else
- values[0].derive = (derive_t) metric->value;
-
vl.values = values;
vl.values_len = 1;
sstrncpy (vl.host, hostname_g, sizeof (vl.host));
sstrncpy (vl.type_instance, name, sizeof (vl.type_instance));
+ if (metric->type == STATSD_GAUGE)
+ values[0].gauge = (gauge_t) metric->value;
+ else if (metric->type == STATSD_TIMER)
+ {
+ size_t i;
+
+ if (metric->updates_num == 0)
+ return (0);
+
+ vl.time = cdtime ();
+
+ ssnprintf (vl.type_instance, sizeof (vl.type_instance),
+ "%s-average", name);
+ values[0].gauge = CDTIME_T_TO_DOUBLE (
+ latency_counter_get_average (metric->latency));
+ plugin_dispatch_values (&vl);
+
+ for (i = 0; i < conf_timer_percentile_num; i++)
+ {
+ ssnprintf (vl.type_instance, sizeof (vl.type_instance),
+ "%s-percentile-%.0f", name, conf_timer_percentile[i]);
+ values[0].gauge = CDTIME_T_TO_DOUBLE (
+ latency_counter_get_percentile (
+ metric->latency, conf_timer_percentile[i]));
+ plugin_dispatch_values (&vl);
+ }
+
+ latency_counter_reset (metrics->latency);
+ return (0);
+ }
+ else if (metric->type == STATSD_SET)
+ {
+ if (metric->set == NULL)
+ values[0].gauge = 0.0;
+ else
+ values[0].gauge = (gauge_t) c_avl_size (metric->set);
+ }
+ else
+ values[0].derive = (derive_t) metric->value;
+
return (plugin_dispatch_values (&vl));
} /* }}} int statsd_metric_submit_unsafe */
diff --git a/src/utils_latency.c b/src/utils_latency.c
--- /dev/null
+++ b/src/utils_latency.c
@@ -0,0 +1,173 @@
+/**
+ * collectd - src/utils_latency.c
+ * Copyright (C) 2013 Florian Forster
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Florian Forster <ff at octo.it>
+ **/
+
+#include "collectd.h"
+#include "utils_latency.h"
+#include "common.h"
+
+#ifndef LATENCY_HISTOGRAM_SIZE
+# define LATENCY_HISTOGRAM_SIZE 1000
+#endif
+
+struct latency_counter_s
+{
+ cdtime_t start_time;
+
+ cdtime_t sum;
+ size_t num;
+
+ cdtime_t min;
+ cdtime_t max;
+
+ int histogram[LATENCY_HISTOGRAM_SIZE];
+};
+
+latency_counter_t *latency_counter_create () /* {{{ */
+{
+ latency_counter_t *lc;
+
+ lc = malloc (sizeof (*lc));
+ if (lc == NULL)
+ return (NULL);
+
+ latency_counter_reset (lc);
+ return (lc);
+} /* }}} latency_counter_t *latency_counter_create */
+
+void latency_counter_destroy (latency_counter_t *lc) /* {{{ */
+{
+ sfree (lc);
+} /* }}} void latency_counter_destroy */
+
+void latency_counter_add (latency_counter_t *lc, cdtime_t latency) /* {{{ */
+{
+ size_t latency_ms;
+
+ if ((lc == NULL) || (latency == 0))
+ return;
+
+ lc->sum += latency;
+ lc->num++;
+
+ if ((lc->min == 0) && (lc->max == 0))
+ lc->min = lc->max = latency;
+ if (lc->min > latency)
+ lc->min = latency;
+ if (lc->max < latency)
+ lc->max = latency;
+
+ /* A latency of _exactly_ 1.0 ms should be stored in the buffer 0, so
+ * subtract one from the cdtime_t value so that exactly 1.0 ms get sorted
+ * accordingly. */
+ latency_ms = (size_t) CDTIME_T_TO_MS (latency - 1);
+ if (latency_ms < STATIC_ARRAY_SIZE (lc->histogram))
+ lc->histogram[latency_ms]++;
+} /* }}} void latency_counter_add */
+
+void latency_counter_reset (latency_counter_t *lc) /* {{{ */
+{
+ if (lc == NULL)
+ return;
+
+ memset (lc, 0, sizeof (*lc));
+ lc->start_time = cdtime ();
+} /* }}} void latency_counter_reset */
+
+cdtime_t latency_counter_get_min (latency_counter_t *lc) /* {{{ */
+{
+ if (lc == NULL)
+ return (0);
+ return (lc->min);
+} /* }}} cdtime_t latency_counter_get_min */
+
+cdtime_t latency_counter_get_max (latency_counter_t *lc) /* {{{ */
+{
+ if (lc == NULL)
+ return (0);
+ return (lc->max);
+} /* }}} cdtime_t latency_counter_get_max */
+
+cdtime_t latency_counter_get_average (latency_counter_t *lc) /* {{{ */
+{
+ double average;
+
+ if (lc == NULL)
+ return (0);
+
+ average = CDTIME_T_TO_DOUBLE (lc->sum) / ((double) lc->num);
+ return (DOUBLE_TO_CDTIME_T (average));
+} /* }}} cdtime_t latency_counter_get_average */
+
+cdtime_t latency_counter_get_percentile (latency_counter_t *lc,
+ double percent)
+{
+ double percent_upper;
+ double percent_lower;
+ double ms_upper;
+ double ms_lower;
+ double ms_interpolated;
+ int sum;
+ size_t i;
+
+ if ((lc == NULL) || !((percent > 0.0) && (percent < 100.0)))
+ return (0);
+
+ /* Find index i so that at least "percent" events are within i+1 ms. */
+ percent_upper = 0.0;
+ percent_lower = 0.0;
+ sum = 0;
+ for (i = 0; i < LATENCY_HISTOGRAM_SIZE; i++)
+ {
+ percent_lower = percent_upper;
+ sum += lc->histogram[i];
+ if (sum == 0)
+ percent_upper = 0.0;
+ else
+ percent_upper = 100.0 * ((double) sum) / ((double) lc->num);
+
+ if (percent_upper >= percent)
+ break;
+ }
+
+ if (i >= LATENCY_HISTOGRAM_SIZE)
+ return (0);
+
+ assert (percent_upper >= percent);
+ assert (percent_lower < percent);
+
+ ms_upper = (double) (i + 1);
+ ms_lower = (double) i;
+ if (i == 0)
+ return (MS_TO_CDTIME_T (ms_upper));
+
+ ms_interpolated = (((percent_upper - percent) * ms_lower)
+ + ((percent - percent_lower) * ms_upper))
+ / (percent_upper - percent_lower);
+
+ return (MS_TO_CDTIME_T (ms_interpolated));
+} /* }}} cdtime_t latency_counter_get_percentile */
+
+/* vim: set sw=2 sts=2 et fdm=marker : */
diff --git a/src/utils_latency.h b/src/utils_latency.h
--- /dev/null
+++ b/src/utils_latency.h
@@ -0,0 +1,45 @@
+/**
+ * collectd - src/utils_latency.h
+ * Copyright (C) 2013 Florian Forster
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Florian Forster <ff at octo.it>
+ **/
+
+#include "collectd.h"
+#include "utils_time.h"
+
+struct latency_counter_s;
+typedef struct latency_counter_s latency_counter_t;
+
+latency_counter_t *latency_counter_create ();
+void latency_counter_destroy (latency_counter_t *lc);
+
+void latency_counter_add (latency_counter_t *lc, cdtime_t latency);
+void latency_counter_reset (latency_counter_t *lc);
+
+cdtime_t latency_counter_get_min (latency_counter_t *lc);
+cdtime_t latency_counter_get_max (latency_counter_t *lc);
+cdtime_t latency_counter_get_average (latency_counter_t *lc);
+cdtime_t latency_counter_get_percentile (latency_counter_t *lc,
+ double percent);
+
+/* vim: set sw=2 sts=2 et : */