
Merge pull request #2157 from maryamtahhan/dpdkevents_upstream
[collectd.git] / src / dpdkevents.c
1 /*
2  * collectd - src/dpdkevents.c
3  * MIT License
4  *
5  * Copyright(c) 2017 Intel Corporation. All rights reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
24  *
25  * Authors:
26  *   Maryam Tahhan <>
27  *   Harry van Haaren <>
28  *   Serhiy Pshyk <>
29  *   Kim-Marie Jones <>
30  *   Krzysztof Matczak <>
31  */
33 #include "collectd.h"
35 #include "common.h"
36 #include "plugin.h"
38 #include "semaphore.h"
39 #include "sys/mman.h"
40 #include "utils_dpdk.h"
41 #include "utils_time.h"
43 #include <rte_config.h>
44 #include <rte_eal.h>
45 #include <rte_ethdev.h>
46 #include <rte_keepalive.h>
48 #define DPDK_EVENTS_PLUGIN "dpdkevents"
49 #define DPDK_EVENTS_NAME "dpdk_collectd_events"
50 #define ETH_LINK_NA 0xFF
52 #define INT64_BIT_SIZE 64
53 #define KEEPALIVE_PLUGIN_INSTANCE "keepalive"
54 #define RTE_KEEPALIVE_SHM_NAME "/dpdk_keepalive_shm_name"
56 typedef struct dpdk_keepalive_shm_s {
57   sem_t core_died;
58   enum rte_keepalive_state core_state[RTE_KEEPALIVE_MAXCORES];
59   uint64_t core_last_seen_times[RTE_KEEPALIVE_MAXCORES];
60 } dpdk_keepalive_shm_t;
62 typedef struct dpdk_ka_monitor_s {
63   cdtime_t read_time;
64   int lcore_state;
65 } dpdk_ka_monitor_t;
67 typedef struct dpdk_link_status_config_s {
68   int enabled;
69   int send_updated;
70   uint32_t enabled_port_mask;
71   char port_name[RTE_MAX_ETHPORTS][DATA_MAX_NAME_LEN];
72   int notify;
73 } dpdk_link_status_config_t;
75 typedef struct dpdk_keep_alive_config_s {
76   int enabled;
77   int send_updated;
78   uint128_t lcore_mask;
79   dpdk_keepalive_shm_t *shm;
80   char shm_name[DATA_MAX_NAME_LEN];
81   int notify;
82 } dpdk_keep_alive_config_t;
84 typedef struct dpdk_events_config_s {
85   cdtime_t interval;
86   dpdk_link_status_config_t link_status;
87   dpdk_keep_alive_config_t keep_alive;
88 } dpdk_events_config_t;
90 typedef struct dpdk_link_info_s {
91   cdtime_t read_time;
92   int status_updated;
93   int link_status;
94 } dpdk_link_info_t;
96 typedef struct dpdk_events_ctx_s {
97   dpdk_events_config_t config;
98   uint32_t nb_ports;
99   dpdk_link_info_t link_info[RTE_MAX_ETHPORTS];
100   dpdk_ka_monitor_t core_info[RTE_KEEPALIVE_MAXCORES];
101 } dpdk_events_ctx_t;
103 #define DPDK_EVENTS_CTX_GET(a) ((dpdk_events_ctx_t *)dpdk_helper_priv_get(a))
105 #define DPDK_EVENTS_TRACE()                                                    \
106   DEBUG("%s:%s:%d pid=%u", DPDK_EVENTS_PLUGIN, __FUNCTION__, __LINE__, getpid())
108 static dpdk_helper_ctx_t *g_hc;
110 static int dpdk_event_keep_alive_shm_create(void) {
111   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
112   char *shm_name;
114   if (strlen(ec->config.keep_alive.shm_name)) {
115     shm_name = ec->config.keep_alive.shm_name;
116   } else {
117     shm_name = RTE_KEEPALIVE_SHM_NAME;
118     WARNING(DPDK_EVENTS_PLUGIN ": Keep alive shared memory identifier is not "
119                                "specified, using default one: %s",
120             shm_name);
121   }
123   char errbuf[ERR_BUF_SIZE];
124   int fd = shm_open(shm_name, O_RDWR, 0);
125   if (fd < 0) {
126     ERROR(DPDK_EVENTS_PLUGIN ": Failed to open %s as SHM:%s. Is DPDK KA "
127                              "primary application running?",
128           shm_name, sstrerror(errno, errbuf, sizeof(errbuf)));
129     return errno;
130   } else {
131     ec->config.keep_alive.shm =
132         (dpdk_keepalive_shm_t *)mmap(0, sizeof(*(ec->config.keep_alive.shm)),
133                                      PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
134     close(fd);
135     if (ec->config.keep_alive.shm == MAP_FAILED) {
136       ERROR(DPDK_EVENTS_PLUGIN ": Failed to mmap KA SHM:%s",
137             sstrerror(errno, errbuf, sizeof(errbuf)));
138       return errno;
139     }
140   }
142   return 0;
145 static void dpdk_events_default_config(void) {
146   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
148   ec->config.interval = plugin_get_interval();
150   /* Link Status */
151   ec->config.link_status.enabled = 0;
152   ec->config.link_status.enabled_port_mask = ~0;
153   ec->config.link_status.send_updated = 1;
154   ec->config.link_status.notify = 0;
156   for (int i = 0; i < RTE_MAX_ETHPORTS; i++) {
157     ec->config.link_status.port_name[i][0] = 0;
158   }
160   /* Keep Alive */
161   ec->config.keep_alive.enabled = 0;
162   ec->config.keep_alive.send_updated = 1;
163   ec->config.keep_alive.notify = 0;
164   memset(&ec->config.keep_alive.lcore_mask, 0,
165          sizeof(ec->config.keep_alive.lcore_mask));
166   memset(&ec->config.keep_alive.shm_name, 0,
167          sizeof(ec->config.keep_alive.shm_name));
170 static int dpdk_events_preinit(void) {
173   if (g_hc != NULL) {
174     /* already initialized if config callback was called before init callback */
175     DEBUG("dpdk_events_preinit: helper already initialized.");
176     return 0;
177   }
179   int ret =
180       dpdk_helper_init(DPDK_EVENTS_NAME, sizeof(dpdk_events_ctx_t), &g_hc);
181   if (ret != 0) {
182     ERROR(DPDK_EVENTS_PLUGIN ": failed to initialize %s helper(error: %s)",
183           DPDK_EVENTS_NAME, strerror(ret));
184     return ret;
185   }
187   dpdk_events_default_config();
189   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
190   for (int i = 0; i < RTE_MAX_ETHPORTS; i++) {
191     ec->link_info[i].link_status = ETH_LINK_NA;
192   }
194   for (int i = 0; i < RTE_KEEPALIVE_MAXCORES; i++) {
195     ec->core_info[i].lcore_state = ETH_LINK_NA;
196   }
198   return ret;
201 static int dpdk_events_link_status_config(dpdk_events_ctx_t *ec,
202                                           oconfig_item_t *ci) {
203   ec->config.link_status.enabled = 1;
205   DEBUG(DPDK_EVENTS_PLUGIN ": Subscribed for Link Status Events.");
207   for (int i = 0; i < ci->children_num; i++) {
208     oconfig_item_t *child = ci->children + i;
210     if (strcasecmp("EnabledPortMask", child->key) == 0) {
211       ec->config.link_status.enabled_port_mask =
212           (uint32_t)child->values[0].value.number;
213       DEBUG(DPDK_EVENTS_PLUGIN ": LinkStatus:Enabled Port Mask 0x%X",
214             ec->config.link_status.enabled_port_mask);
215     } else if (strcasecmp("SendEventsOnUpdate", child->key) == 0) {
216       ec->config.link_status.send_updated = child->values[0].value.boolean;
217       DEBUG(DPDK_EVENTS_PLUGIN ": LinkStatus:SendEventsOnUpdate %d",
218             (int)child->values[0].value.boolean);
219     } else if (strcasecmp("SendNotification", child->key) == 0) {
220       ec->config.link_status.notify = child->values[0].value.boolean;
221       DEBUG(DPDK_EVENTS_PLUGIN ": LinkStatus:SendNotification %d",
222             (int)child->values[0].value.boolean);
223     }
224   }
226   int port_num = 0;
228   /* parse port names after EnabledPortMask was parsed */
229   for (int i = 0; i < ci->children_num; i++) {
230     oconfig_item_t *child = ci->children + i;
231     if (strcasecmp("PortName", child->key) == 0) {
232       while (!(ec->config.link_status.enabled_port_mask & (1 << port_num)))
233         port_num++;
234       ssnprintf(ec->config.link_status.port_name[port_num], DATA_MAX_NAME_LEN,
235                 "%s", child->values[0].value.string);
236       DEBUG(DPDK_EVENTS_PLUGIN ": LinkStatus:Port %d Name: %s", port_num,
237             ec->config.link_status.port_name[port_num]);
238       port_num++;
239     }
240   }
242   return 0;
245 static int dpdk_events_keep_alive_config(dpdk_events_ctx_t *ec,
246                                          oconfig_item_t *ci) {
247   ec->config.keep_alive.enabled = 1;
248   DEBUG(DPDK_EVENTS_PLUGIN ": Subscribed for Keep Alive Events.");
250   for (int i = 0; i < ci->children_num; i++) {
251     oconfig_item_t *child = ci->children + i;
253     if (strcasecmp("SendEventsOnUpdate", child->key) == 0) {
254       ec->config.keep_alive.send_updated = child->values[0].value.boolean;
255       DEBUG(DPDK_EVENTS_PLUGIN ": KeepAlive:SendEventsOnUpdate %d",
256             (int)child->values[0].value.boolean);
257     } else if (strcasecmp("LCoreMask", child->key) == 0) {
258       char lcore_mask[DATA_MAX_NAME_LEN];
259       ssnprintf(lcore_mask, sizeof(lcore_mask), "%s",
260                 child->values[0].value.string);
261       ec->config.keep_alive.lcore_mask =
262           str_to_uint128(lcore_mask, strlen(lcore_mask));
263       DEBUG(DPDK_EVENTS_PLUGIN ": KeepAlive:LCoreMask 0x%" PRIX64 "%" PRIX64 "",
264             ec->config.keep_alive.lcore_mask.high,
265             ec->config.keep_alive.lcore_mask.low);
266     } else if (strcasecmp("KeepAliveShmName", child->key) == 0) {
267       ssnprintf(ec->config.keep_alive.shm_name,
268                 sizeof(ec->config.keep_alive.shm_name), "%s",
269                 child->values[0].value.string);
270       DEBUG(DPDK_EVENTS_PLUGIN ": KeepAlive:KeepAliveShmName %s",
271             ec->config.keep_alive.shm_name);
272     } else if (strcasecmp("SendNotification", child->key) == 0) {
273       ec->config.keep_alive.notify = child->values[0].value.boolean;
274       DEBUG(DPDK_EVENTS_PLUGIN ": KeepAlive:SendNotification %d",
275             (int)child->values[0].value.boolean);
276     }
277   }
279   return 0;
282 static int dpdk_events_config(oconfig_item_t *ci) {
285   int ret = dpdk_events_preinit();
286   if (ret)
287     return ret;
289   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
291   for (int i = 0; i < ci->children_num; i++) {
292     oconfig_item_t *child = ci->children + i;
293     if (strcasecmp("EAL", child->key) == 0) {
294       dpdk_helper_eal_config_parse(g_hc, child);
295     } else if (strcasecmp("Event", child->key) == 0) {
296       if (strcasecmp(child->values[0].value.string, "link_status") == 0) {
297         dpdk_events_link_status_config(ec, child);
298       } else if (strcasecmp(child->values[0].value.string, "keep_alive") == 0) {
299         dpdk_events_keep_alive_config(ec, child);
300       } else {
301         ERROR(DPDK_EVENTS_PLUGIN ": The selected event \"%s\" is unknown.",
302               child->values[0].value.string);
303       }
304     }
305   }
307   if (!ec->config.keep_alive.enabled && !ec->config.link_status.enabled) {
308     ERROR(DPDK_EVENTS_PLUGIN ": At least one type of events should be "
309                              "configured for collecting. Plugin misconfigured");
310     return -1;
311   }
313   return ret;
316 static int dpdk_helper_link_status_get(dpdk_helper_ctx_t *phc) {
317   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(phc);
319   /* get Link Status values from DPDK */
320   uint8_t nb_ports = rte_eth_dev_count();
321   if (nb_ports == 0) {
322     DPDK_CHILD_LOG("dpdkevent-helper: No DPDK ports available. "
323                    "Check bound devices to DPDK driver.\n");
324     return -ENODEV;
325   }
326   ec->nb_ports = nb_ports > RTE_MAX_ETHPORTS ? RTE_MAX_ETHPORTS : nb_ports;
328   for (int i = 0; i < ec->nb_ports; i++) {
329     if (ec->config.link_status.enabled_port_mask & (1 << i)) {
330       struct rte_eth_link link;
331       ec->link_info[i].read_time = cdtime();
332       rte_eth_link_get_nowait(i, &link);
333       if ((link.link_status == ETH_LINK_NA) ||
334           (link.link_status != ec->link_info[i].link_status)) {
335         ec->link_info[i].link_status = link.link_status;
336         ec->link_info[i].status_updated = 1;
337         DPDK_CHILD_LOG(" === PORT %d Link Status: %s\n", i,
338                        link.link_status ? "UP" : "DOWN");
339       }
340     }
341   }
343   return 0;
346 /* this function is called from helper context */
347 int dpdk_helper_command_handler(dpdk_helper_ctx_t *phc, enum DPDK_CMD cmd) {
348   if (phc == NULL) {
349     DPDK_CHILD_LOG(DPDK_EVENTS_PLUGIN ": Invalid argument(phc)\n");
350     return -EINVAL;
351   }
353   if (cmd != DPDK_CMD_GET_EVENTS) {
354     DPDK_CHILD_LOG(DPDK_EVENTS_PLUGIN ": Unknown command (cmd=%d)\n", cmd);
355     return -EINVAL;
356   }
358   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(phc);
359   int ret = 0;
360   if (ec->config.link_status.enabled)
361     ret = dpdk_helper_link_status_get(phc);
363   return ret;
366 static void dpdk_events_notification_dispatch(int severity,
367                                               const char *plugin_instance,
368                                               cdtime_t time, const char *msg) {
369   notification_t n = {
370       .severity = severity, .time = time, .plugin = DPDK_EVENTS_PLUGIN};
371   sstrncpy(, hostname_g, sizeof(;
372   sstrncpy(n.plugin_instance, plugin_instance, sizeof(n.plugin_instance));
373   sstrncpy(n.message, msg, sizeof(n.message));
374   plugin_dispatch_notification(&n);
377 static void dpdk_events_gauge_submit(const char *plugin_instance,
378                                      const char *type_instance, gauge_t value,
379                                      cdtime_t time) {
380   value_list_t vl = {.values = &(value_t){.gauge = value},
381                      .values_len = 1,
382                      .time = time,
383                      .plugin = DPDK_EVENTS_PLUGIN,
384                      .type = "gauge",
385                      .meta = NULL};
386   sstrncpy(, hostname_g, sizeof(;
387   sstrncpy(vl.plugin_instance, plugin_instance, sizeof(vl.plugin_instance));
388   sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
389   plugin_dispatch_values(&vl);
392 static int dpdk_events_link_status_dispatch(dpdk_helper_ctx_t *phc) {
393   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(phc);
394   DEBUG(DPDK_EVENTS_PLUGIN ": %s:%d ports=%u", __FUNCTION__, __LINE__,
395         ec->nb_ports);
397   /* dispatch Link Status values to collectd */
398   for (int i = 0; i < ec->nb_ports; i++) {
399     if (ec->config.link_status.enabled_port_mask & (1 << i)) {
400       if (!ec->config.link_status.send_updated ||
401           ec->link_info[i].status_updated) {
403         DEBUG(DPDK_EVENTS_PLUGIN ": Dispatch PORT %d Link Status: %s", i,
404               ec->link_info[i].link_status ? "UP" : "DOWN");
406         char dev_name[DATA_MAX_NAME_LEN];
407         if (ec->config.link_status.port_name[i][0] != 0) {
408           ssnprintf(dev_name, sizeof(dev_name), "%s",
409                     ec->config.link_status.port_name[i]);
410         } else {
411           ssnprintf(dev_name, sizeof(dev_name), "port.%d", i);
412         }
414         if (ec->config.link_status.notify) {
415           int sev = ec->link_info[i].link_status ? NOTIF_OKAY : NOTIF_WARNING;
416           char msg[DATA_MAX_NAME_LEN];
417           ssnprintf(msg, sizeof(msg), "Link Status: %s",
418                     ec->link_info[i].link_status ? "UP" : "DOWN");
419           dpdk_events_notification_dispatch(sev, dev_name,
420                                             ec->link_info[i].read_time, msg);
421         } else {
422           dpdk_events_gauge_submit(dev_name, "link_status",
423                                    (gauge_t)ec->link_info[i].link_status,
424                                    ec->link_info[i].read_time);
425         }
426         ec->link_info[i].status_updated = 0;
427       }
428     }
429   }
431   return 0;
434 static void dpdk_events_keep_alive_dispatch(dpdk_helper_ctx_t *phc) {
435   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(phc);
437   /* dispatch Keep Alive values to collectd */
438   for (int i = 0; i < RTE_KEEPALIVE_MAXCORES; i++) {
439     if (i < INT64_BIT_SIZE) {
440       if (!(ec->config.keep_alive.lcore_mask.low & ((uint64_t)1 << i)))
441         continue;
442     } else if (i >= INT64_BIT_SIZE && i < INT64_BIT_SIZE * 2) {
443       if (!(ec->config.keep_alive.lcore_mask.high &
444             ((uint64_t)1 << (i - INT64_BIT_SIZE))))
445         continue;
446     } else {
448               ": %s:%d Core id %u is out of 0 to %u range, skipping",
449               __FUNCTION__, __LINE__, i, INT64_BIT_SIZE * 2);
450       continue;
451     }
453     char core_name[DATA_MAX_NAME_LEN];
454     ssnprintf(core_name, sizeof(core_name), "lcore%u", i);
456     if (!ec->config.keep_alive.send_updated ||
457         (ec->core_info[i].lcore_state !=
458          ec->config.keep_alive.shm->core_state[i])) {
459       ec->core_info[i].lcore_state = ec->config.keep_alive.shm->core_state[i];
460       ec->core_info[i].read_time = cdtime();
462       if (ec->config.keep_alive.notify) {
463         char msg[DATA_MAX_NAME_LEN];
464         int sev;
466         switch (ec->config.keep_alive.shm->core_state[i]) {
467         case RTE_KA_STATE_ALIVE:
468           sev = NOTIF_OKAY;
469           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: ALIVE", i);
470           break;
471         case RTE_KA_STATE_MISSING:
472           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: MISSING", i);
473           sev = NOTIF_WARNING;
474           break;
475         case RTE_KA_STATE_DEAD:
476           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: DEAD", i);
477           sev = NOTIF_FAILURE;
478           break;
479         case RTE_KA_STATE_UNUSED:
480           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: UNUSED", i);
481           sev = NOTIF_OKAY;
482           break;
483         case RTE_KA_STATE_GONE:
484           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: GONE", i);
485           sev = NOTIF_FAILURE;
486           break;
487         case RTE_KA_STATE_DOZING:
488           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: DOZING", i);
489           sev = NOTIF_OKAY;
490           break;
491         case RTE_KA_STATE_SLEEP:
492           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: SLEEP", i);
493           sev = NOTIF_OKAY;
494           break;
495         default:
496           ssnprintf(msg, sizeof(msg), "lcore %u Keep Alive Status: UNKNOWN", i);
497           sev = NOTIF_FAILURE;
498         }
500         dpdk_events_notification_dispatch(sev, KEEPALIVE_PLUGIN_INSTANCE,
501                                           ec->core_info[i].read_time, msg);
502       } else {
503         dpdk_events_gauge_submit(KEEPALIVE_PLUGIN_INSTANCE, core_name,
504                                  ec->config.keep_alive.shm->core_state[i],
505                                  ec->core_info[i].read_time);
506       }
507     }
508   }
511 static int dpdk_events_read(user_data_t *ud) {
514   if (g_hc == NULL) {
515     ERROR(DPDK_EVENTS_PLUGIN ": plugin not initialized.");
516     return -1;
517   }
519   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
521   if (ec->config.link_status.enabled) {
522     int cmd_res = 0;
523     int ret = dpdk_helper_command(g_hc, DPDK_CMD_GET_EVENTS, &cmd_res,
524                                   ec->config.interval);
525     if (cmd_res == 0 && ret == 0) {
526       dpdk_events_link_status_dispatch(g_hc);
527     }
528   }
530   if (ec->config.keep_alive.enabled) {
531     dpdk_events_keep_alive_dispatch(g_hc);
532   }
534   return 0;
537 static int dpdk_events_init(void) {
540   int ret = dpdk_events_preinit();
541   if (ret)
542     return ret;
544   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
546   if (ec->config.keep_alive.enabled) {
547     ret = dpdk_event_keep_alive_shm_create();
548     if (ret) {
549       ERROR(DPDK_EVENTS_PLUGIN ": %s : error %d in ka_shm_create()",
550             __FUNCTION__, ret);
551       return ret;
552     }
553   }
554   return 0;
557 static int dpdk_events_shutdown(void) {
559   int ret;
561   dpdk_events_ctx_t *ec = DPDK_EVENTS_CTX_GET(g_hc);
562   if (ec->config.keep_alive.enabled) {
563     ret = munmap(ec->config.keep_alive.shm, sizeof(dpdk_keepalive_shm_t));
564     if (ret) {
565       ERROR(DPDK_EVENTS_PLUGIN ": munmap KA monitor returned %d", ret);
566       return ret;
567     }
568   }
570   ret = dpdk_helper_shutdown(g_hc);
571   g_hc = NULL;
572   if (ret)
573     ERROR(DPDK_EVENTS_PLUGIN ": failed to cleanup %s helper", DPDK_EVENTS_NAME);
575   return ret;
578 void module_register(void) {
579   plugin_register_init(DPDK_EVENTS_PLUGIN, dpdk_events_init);
580   plugin_register_complex_config(DPDK_EVENTS_PLUGIN, dpdk_events_config);
581   plugin_register_complex_read(NULL, DPDK_EVENTS_PLUGIN, dpdk_events_read, 0,
582                                NULL);
583   plugin_register_shutdown(DPDK_EVENTS_PLUGIN, dpdk_events_shutdown);