ff96843b7b62083c39d10143577bba671c4c460e
1 /**
2 * collectd - src/threshold.c
3 * Copyright (C) 2007-2010 Florian Forster
4 * Copyright (C) 2008-2009 Sebastian Harl
5 * Copyright (C) 2009 Andrés J. Díaz
6 * Copyright (C) 2014 Pierre-Yves Ritschard
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; only version 2 of the License is applicable.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * Author:
22 * Pierre-Yves Ritschard <pyr at spootnik.org>
23 * Florian octo Forster <octo at collectd.org>
24 * Sebastian Harl <sh at tokkee.org>
25 * Andrés J. Díaz <ajdiaz at connectical.com>
26 **/
28 #include "collectd.h"
29 #include "common.h"
30 #include "plugin.h"
31 #include "utils_avltree.h"
32 #include "utils_cache.h"
33 #include "utils_threshold.h"
35 #include <assert.h>
36 #include <ltdl.h>
37 #include <pthread.h>
39 /*
40 * Threshold management
41 * ====================
42 * The following functions add, delete, search, etc. configured thresholds to
43 * the underlying AVL trees.
44 */
45 /*
46 * threshold_t *threshold_get
47 *
48 * Retrieve one specific threshold configuration. For looking up a threshold
49 * matching a value_list_t, see "threshold_search" below. Returns NULL if the
50 * specified threshold doesn't exist.
51 */
52 static threshold_t *threshold_get (const char *hostname,
53 const char *plugin, const char *plugin_instance,
54 const char *type, const char *type_instance)
55 { /* {{{ */
56 char name[6 * DATA_MAX_NAME_LEN];
57 threshold_t *th = NULL;
59 format_name (name, sizeof (name),
60 (hostname == NULL) ? "" : hostname,
61 (plugin == NULL) ? "" : plugin, plugin_instance,
62 (type == NULL) ? "" : type, type_instance);
63 name[sizeof (name) - 1] = '\0';
65 if (c_avl_get (threshold_tree, name, (void *) &th) == 0)
66 return (th);
67 else
68 return (NULL);
69 } /* }}} threshold_t *threshold_get */
71 /*
72 * threshold_t *threshold_search
73 *
74 * Searches for a threshold configuration using all the possible variations of
75 * "Host", "Plugin" and "Type" blocks. Returns NULL if no threshold could be
76 * found.
77 * XXX: This is likely the least efficient function in collectd.
78 */
79 static threshold_t *threshold_search (const value_list_t *vl)
80 { /* {{{ */
81 threshold_t *th;
83 if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance,
84 vl->type, vl->type_instance)) != NULL)
85 return (th);
86 else if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance,
87 vl->type, NULL)) != NULL)
88 return (th);
89 else if ((th = threshold_get (vl->host, vl->plugin, NULL,
90 vl->type, vl->type_instance)) != NULL)
91 return (th);
92 else if ((th = threshold_get (vl->host, vl->plugin, NULL,
93 vl->type, NULL)) != NULL)
94 return (th);
95 else if ((th = threshold_get (vl->host, "", NULL,
96 vl->type, vl->type_instance)) != NULL)
97 return (th);
98 else if ((th = threshold_get (vl->host, "", NULL,
99 vl->type, NULL)) != NULL)
100 return (th);
101 else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance,
102 vl->type, vl->type_instance)) != NULL)
103 return (th);
104 else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance,
105 vl->type, NULL)) != NULL)
106 return (th);
107 else if ((th = threshold_get ("", vl->plugin, NULL,
108 vl->type, vl->type_instance)) != NULL)
109 return (th);
110 else if ((th = threshold_get ("", vl->plugin, NULL,
111 vl->type, NULL)) != NULL)
112 return (th);
113 else if ((th = threshold_get ("", "", NULL,
114 vl->type, vl->type_instance)) != NULL)
115 return (th);
116 else if ((th = threshold_get ("", "", NULL,
117 vl->type, NULL)) != NULL)
118 return (th);
120 return (NULL);
121 } /* }}} threshold_t *threshold_search */
123 /*
124 * int ut_check_one_data_source
125 *
126 * Checks one data source against the given threshold configuration. If the
127 * `DataSource' option is set in the threshold, and the name does NOT match,
128 * `okay' is returned. If the threshold does match, its failure and warning
129 * min and max values are checked and `failure' or `warning' is returned if
130 * appropriate.
131 * Does not fail.
132 */
133 static int ut_check_one_data_source (const data_set_t *ds,
134 const value_list_t __attribute__((unused)) *vl,
135 const threshold_t *th,
136 const gauge_t *values,
137 int ds_index)
138 { /* {{{ */
139 const char *ds_name;
140 int is_warning = 0;
141 int is_failure = 0;
142 int prev_state = STATE_OKAY;
144 /* check if this threshold applies to this data source */
145 if (ds != NULL)
146 {
147 ds_name = ds->ds[ds_index].name;
148 if ((th->data_source[0] != 0)
149 && (strcmp (ds_name, th->data_source) != 0))
150 return (STATE_OKAY);
151 }
153 if ((th->flags & UT_FLAG_INVERT) != 0)
154 {
155 is_warning--;
156 is_failure--;
157 }
159 /* XXX: This is an experimental code, not optimized, not fast, not reliable,
160 * and probably, do not work as you expect. Enjoy! :D */
161 if ( (th->hysteresis > 0) && ((prev_state = uc_get_state(ds,vl)) != STATE_OKAY) )
162 {
163 switch(prev_state)
164 {
165 case STATE_ERROR:
166 if ( (!isnan (th->failure_min) && ((th->failure_min + th->hysteresis) < values[ds_index])) ||
167 (!isnan (th->failure_max) && ((th->failure_max - th->hysteresis) > values[ds_index])) )
168 return (STATE_OKAY);
169 else
170 is_failure++;
171 case STATE_WARNING:
172 if ( (!isnan (th->warning_min) && ((th->warning_min + th->hysteresis) < values[ds_index])) ||
173 (!isnan (th->warning_max) && ((th->warning_max - th->hysteresis) > values[ds_index])) )
174 return (STATE_OKAY);
175 else
176 is_warning++;
177 }
178 }
179 else { /* no hysteresis */
180 if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index]))
181 || (!isnan (th->failure_max) && (th->failure_max < values[ds_index])))
182 is_failure++;
184 if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index]))
185 || (!isnan (th->warning_max) && (th->warning_max < values[ds_index])))
186 is_warning++;
187 }
189 if (is_failure != 0)
190 return (STATE_ERROR);
192 if (is_warning != 0)
193 return (STATE_WARNING);
195 return (STATE_OKAY);
196 } /* }}} int ut_check_one_data_source */
198 /*
199 * int ut_check_one_threshold
200 *
201 * Checks all data sources of a value list against the given threshold, using
202 * the ut_check_one_data_source function above. Returns the worst status,
203 * which is `okay' if nothing has failed.
204 * Returns less than zero if the data set doesn't have any data sources.
205 */
206 static int ut_check_one_threshold (const data_set_t *ds,
207 const value_list_t *vl,
208 const threshold_t *th,
209 const gauge_t *values,
210 int *statuses)
211 { /* {{{ */
212 int ret = -1;
213 int i;
214 int status;
215 gauge_t values_copy[ds->ds_num];
217 memcpy (values_copy, values, sizeof (values_copy));
219 if ((th->flags & UT_FLAG_PERCENTAGE) != 0)
220 {
221 int num = 0;
222 gauge_t sum=0.0;
224 if (ds->ds_num == 1)
225 {
226 WARNING ("ut_check_one_threshold: The %s type has only one data "
227 "source, but you have configured to check this as a percentage. "
228 "That doesn't make much sense, because the percentage will always "
229 "be 100%%!", ds->type);
230 }
232 /* Prepare `sum' and `num'. */
233 for (i = 0; i < ds->ds_num; i++)
234 if (!isnan (values[i]))
235 {
236 num++;
237 sum += values[i];
238 }
240 if ((num == 0) /* All data sources are undefined. */
241 || (sum == 0.0)) /* Sum is zero, cannot calculate percentage. */
242 {
243 for (i = 0; i < ds->ds_num; i++)
244 values_copy[i] = NAN;
245 }
246 else /* We can actually calculate the percentage. */
247 {
248 for (i = 0; i < ds->ds_num; i++)
249 values_copy[i] = 100.0 * values[i] / sum;
250 }
251 } /* if (UT_FLAG_PERCENTAGE) */
253 for (i = 0; i < ds->ds_num; i++)
254 {
255 status = ut_check_one_data_source (ds, vl, th, values_copy, i);
256 if (status != -1) {
257 ret = 0;
258 if (statuses[i] < status)
259 statuses[i] = status;
260 }
261 } /* for (ds->ds_num) */
263 return (ret);
264 } /* }}} int ut_check_one_threshold */
266 /*
267 * int ut_check_threshold
268 *
269 * Gets a list of matching thresholds and searches for the worst status by one
270 * of the thresholds. Then reports that status using the ut_report_state
271 * function above.
272 * Returns zero on success and if no threshold has been configured. Returns
273 * less than zero on failure.
274 */
275 int write_riemann_threshold_check (const data_set_t *ds, const value_list_t *vl,
276 int *statuses)
277 { /* {{{ */
278 threshold_t *th;
279 gauge_t *values;
280 int status;
282 memset(statuses, 0, vl->values_len * sizeof(*statuses));
283 if (threshold_tree == NULL)
284 return 0;
286 /* Is this lock really necessary? So far, thresholds are only inserted at
287 * startup. -octo */
288 pthread_mutex_lock (&threshold_lock);
289 th = threshold_search (vl);
290 pthread_mutex_unlock (&threshold_lock);
291 if (th == NULL)
292 return (0);
294 DEBUG ("ut_check_threshold: Found matching threshold(s)");
296 values = uc_get_rate (ds, vl);
297 if (values == NULL)
298 return (0);
300 while (th != NULL)
301 {
302 status = ut_check_one_threshold (ds, vl, th, values, statuses);
303 if (status < 0)
304 {
305 ERROR ("ut_check_threshold: ut_check_one_threshold failed.");
306 sfree (values);
307 return (-1);
308 }
310 th = th->next;
311 } /* while (th) */
313 sfree (values);
315 return (0);
316 } /* }}} int ut_check_threshold */
319 /* vim: set sw=2 ts=8 sts=2 tw=78 et fdm=marker : */