From 1115921a0d8f73c08fd505344af4266105e1155d Mon Sep 17 00:00:00 2001 From: Yves Mettier Date: Thu, 14 Mar 2013 16:45:02 +0100 Subject: [PATCH] Added new WriteQueueLengthLimit (drop values when bigger) Signed-off-by: Florian Forster --- src/collectd.conf.pod | 17 ++++++ src/configfile.c | 2 + src/plugin.c | 128 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 139 insertions(+), 8 deletions(-) diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index ddf3ac60..f2a971f6 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -235,6 +235,23 @@ Number of threads to start for dispatching value lists to write plugins. The default value is B<5>, but you may want to increase this if you have more than five plugins that may take relatively long to write to. +=item B I +=item B I + +Default value for high limit is 0 (no limit). +Default value for low limit is 50% of high limit. + +When the write queue size becomes bigger than the high limit, values I be dropped. +When the write queue size is between low and high, values I be dropped (depending +on the queue size) + +If high limit is set to 0, there is no limit. This is the default. +If high limit is set, but not low limit, low will be computed as 50% of high. + +If you do not want to randomly drop values when the queue size is between low +and high value, set the same value for low and high. When low=high and when the +queue size is bigger, values are just dropped until the queue size becomes smaller. + =item B I Sets the hostname that identifies a host. If you omit this setting, the diff --git a/src/configfile.c b/src/configfile.c index d6c224fd..88bed1c4 100644 --- a/src/configfile.c +++ b/src/configfile.c @@ -109,6 +109,8 @@ static cf_global_option_t cf_global_options[] = {"Interval", NULL, NULL}, {"ReadThreads", NULL, "5"}, {"WriteThreads", NULL, "5"}, + {"WriteQueueLengthLimitHigh", NULL, NULL}, + {"WriteQueueLengthLimitLow", NULL, NULL}, {"Timeout", NULL, "2"}, {"AutoLoadPlugin", NULL, "false"}, {"PreCacheChain", NULL, "PreCache"}, diff --git a/src/plugin.c b/src/plugin.c index 894b0e51..4e106a5b 100644 --- a/src/plugin.c +++ b/src/plugin.c @@ -108,6 +108,7 @@ static int read_threads_num = 0; static write_queue_t *write_queue_head; static write_queue_t *write_queue_tail; +static long write_queue_size = 0; static _Bool write_loop = 1; static pthread_mutex_t write_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t write_cond = PTHREAD_COND_INITIALIZER; @@ -117,6 +118,11 @@ static size_t write_threads_num = 0; static pthread_key_t plugin_ctx_key; static _Bool plugin_ctx_key_initialized = 0; +static long writequeuelengthlimit_high = 0; +static long writequeuelengthlimit_low = 0; + +static unsigned int random_seed; + /* * Static functions */ @@ -670,11 +676,13 @@ static int plugin_write_enqueue (value_list_t const *vl) /* {{{ */ { write_queue_head = q; write_queue_tail = q; + write_queue_size = 1; } else { write_queue_tail->next = q; write_queue_tail = q; + write_queue_size += 1; } pthread_cond_signal (&write_cond); @@ -701,8 +709,11 @@ static value_list_t *plugin_write_dequeue (void) /* {{{ */ q = write_queue_head; write_queue_head = q->next; - if (write_queue_head == NULL) + write_queue_size -= 1; + if (write_queue_head == NULL) { write_queue_tail = NULL; + write_queue_size = 0; /* Maybe instead of setting write_queue_size to 0, we should assert(write_queue_size == 0) ? */ + } pthread_mutex_unlock (&write_lock); @@ -805,6 +816,7 @@ static void stop_write_threads (void) /* {{{ */ } write_queue_head = NULL; write_queue_tail = NULL; + write_queue_size = 0; pthread_mutex_unlock (&write_lock); if (i > 0) @@ -1436,6 +1448,61 @@ void plugin_init_all (void) chain_name = global_option_get ("PostCacheChain"); post_cache_chain = fc_chain_get_by_name (chain_name); + { + const char *str_writequeuelengthlimithigh = global_option_get ("WriteQueueLengthLimitHigh"); + const char *str_writequeuelengthlimitlow = global_option_get ("WriteQueueLengthLimitLow"); + + writequeuelengthlimit_high = 0; + writequeuelengthlimit_low = 0; + + if(NULL != str_writequeuelengthlimithigh) { + errno = 0; + /* get high limit */ + writequeuelengthlimit_high = strtol(str_writequeuelengthlimithigh, NULL, 10); + if ((errno == ERANGE && (writequeuelengthlimit_high == LONG_MAX || writequeuelengthlimit_high == LONG_MIN)) + || (errno != 0 && writequeuelengthlimit_high == 0) + ) { + writequeuelengthlimit_high = 0; + ERROR("Config 'WriteQueueLengthLimitHigh' accepts one integer value only. Running with no limit !"); + } + if(writequeuelengthlimit_high < 0) { + ERROR("Config 'WriteQueueLengthLimitHigh' accepts positive values only. Running with no limit !"); + writequeuelengthlimit_high = 0; + } + } + + if((writequeuelengthlimit_high > 0) && (NULL != str_writequeuelengthlimitlow)) { + errno = 0; + /* get low limit */ + writequeuelengthlimit_low = strtol(str_writequeuelengthlimitlow, NULL, 10); + if ((errno == ERANGE && (writequeuelengthlimit_low == LONG_MAX || writequeuelengthlimit_low == LONG_MIN)) + || (errno != 0 && writequeuelengthlimit_low == 0) + ) { + writequeuelengthlimit_low = 0; + ERROR("Config 'WriteQueueLengthLimitLow' accepts one integer value only. Using default low limit instead"); + } + + if(writequeuelengthlimit_low < 0) { + ERROR("Config 'WriteQueueLengthLimitLow' accepts positive values only. Using default low limit instead"); + writequeuelengthlimit_low = 0; + } else if(writequeuelengthlimit_low > writequeuelengthlimit_high) { + ERROR("Config 'WriteQueueLengthLimitLow' (%ld) cannot be bigger than high limit (%ld). Using default low limit instead", + writequeuelengthlimit_low, writequeuelengthlimit_high); + writequeuelengthlimit_low = 0; + } + } + /* Check/compute low limit if not/badly defined */ + if(writequeuelengthlimit_high > 0) { + if(0 == writequeuelengthlimit_low) { + writequeuelengthlimit_low = .5 * writequeuelengthlimit_high; + } + INFO("Config 'WriteQueueLengthLimit*' : Running with limits high=%ld low=%ld", writequeuelengthlimit_high, writequeuelengthlimit_low); + random_seed = time(0); + } else { + writequeuelengthlimit_low = 0; /* This should be useless, but in case... */ + } + } + { char const *tmp = global_option_get ("WriteThreads"); int num = atoi (tmp); @@ -1953,15 +2020,60 @@ static int plugin_dispatch_values_internal (value_list_t *vl) int plugin_dispatch_values (value_list_t const *vl) { int status; + int wq_size = write_queue_size; + /* We store write_queue_size in a local variable because other threads may update write_queue_size. + * Having this in a local variable (like a cache) is better : we do not need a lock */ + short metric_will_be_dropped = 0; + + if((writequeuelengthlimit_high > 0) && (wq_size > writequeuelengthlimit_low)) { + if(wq_size >= writequeuelengthlimit_high) { + /* if high == low, we come here too */ + metric_will_be_dropped = 1; + } else { + /* here, high != low */ + long probability_to_drop; + long n; + + probability_to_drop = (wq_size - writequeuelengthlimit_low); + + /* We use rand_r with its bad RNG because it's enough for playing dices. + * There is no security consideration here so rand_r() should be enough here. + */ + n = rand_r(&random_seed) % (writequeuelengthlimit_high - writequeuelengthlimit_low) ; + + /* Let's have X = high - low. + * n is in range [0..X] + * probability_to_drop is in range [1..X[ + * probability_to_drop gets bigger when wq_size gets bigger. + */ + if(n <= probability_to_drop) { + metric_will_be_dropped = 1; + } + } + } - status = plugin_write_enqueue (vl); - if (status != 0) + if( ! metric_will_be_dropped) { + status = plugin_write_enqueue (vl); + if (status != 0) + { + char errbuf[1024]; + ERROR ("plugin_dispatch_values: plugin_write_enqueue failed " + "with status %i (%s).", status, + sstrerror (status, errbuf, sizeof (errbuf))); + return (status); + } + } + else { - char errbuf[1024]; - ERROR ("plugin_dispatch_values: plugin_write_enqueue failed " - "with status %i (%s).", status, - sstrerror (status, errbuf, sizeof (errbuf))); - return (status); + /* If you want to count dropped metrics, don't forget to add a lock here */ + /* dropped_metrics++; */ + ERROR ("plugin_dispatch_values: value dropped (write queue %ld > %ld) : time = %.3f; interval = %.3f ; %s/%s%s%s/%s%s%s", + write_queue_size, writequeuelengthlimit_low, + CDTIME_T_TO_DOUBLE (vl->time), + CDTIME_T_TO_DOUBLE (vl->interval), + vl->host, + vl->plugin, vl->plugin_instance[0]?"-":"", vl->plugin_instance, + vl->type, vl->type_instance[0]?"-":"", vl->type_instance); } return (0); -- 2.30.2