From ea9db2c32ef7f8a3135dc1413fbf8ee46a25f680 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Wed, 15 Jun 2016 09:53:07 +0200 Subject: [PATCH] write_kafka: ensure key is properly hashed to select partition Otherwise, partition is selected solely on the first letter of the key. If all your hosts are starting with the same letter, everything ends up in a single partition. --- src/write_kafka.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/write_kafka.c b/src/write_kafka.c index e881593b..614ce0f0 100644 --- a/src/write_kafka.c +++ b/src/write_kafka.c @@ -73,11 +73,19 @@ static void kafka_log(const rd_kafka_t *rkt, int level, } #endif +static uint32_t kafka_hash(const char *keydata, size_t keylen) +{ + uint32_t hash = 5381; + for (; keylen > 0; keylen--) + hash = ((hash << 5) + hash) + keydata[keylen - 1]; + return hash; +} + static int32_t kafka_partition(const rd_kafka_topic_t *rkt, const void *keydata, size_t keylen, int32_t partition_cnt, void *p, void *m) { - uint32_t key = *((uint32_t *)keydata ); + uint32_t key = kafka_hash(keydata, keylen); uint32_t target = key % partition_cnt; int32_t i = partition_cnt; -- 2.30.2