1 /**
2 * collectd - src/intel_rdt.c
3 *
4 * Copyright(c) 2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy of
7 * this software and associated documentation files (the "Software"), to deal in
8 * the Software without restriction, including without limitation the rights to
9 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10 * of the Software, and to permit persons to whom the Software is furnished to do
11 * so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 **/
28 #include <pqos.h>
30 #include "common.h"
32 #define RDT_PLUGIN "intel_rdt"
34 #define RDT_MAX_SOCKETS 8
35 #define RDT_MAX_SOCKET_CORES 64
36 #define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS)
38 struct rdt_core_group_s {
39 char *desc;
40 size_t num_cores;
41 unsigned *cores;
42 enum pqos_mon_event events;
43 };
44 typedef struct rdt_core_group_s rdt_core_group_t;
46 struct rdt_ctx_s {
47 rdt_core_group_t cgroups[RDT_MAX_CORES];
48 struct pqos_mon_data *pgroups[RDT_MAX_CORES];
49 size_t num_groups;
50 const struct pqos_cpuinfo *pqos_cpu;
51 const struct pqos_cap *pqos_cap;
52 const struct pqos_capability *cap_mon;
53 };
54 typedef struct rdt_ctx_s rdt_ctx_t;
56 static rdt_ctx_t *g_rdt = NULL;
58 static int isdup(const uint64_t *nums, size_t size, uint64_t val) {
59 for (size_t i = 0; i < size; i++)
60 if (nums[i] == val)
61 return 1;
62 return 0;
63 }
65 static int strtouint64(const char *s, uint64_t *n) {
66 char *endptr = NULL;
68 assert(s != NULL);
69 assert(n != NULL);
71 *n = strtoull(s, &endptr, 0);
73 if (!(*s != '\0' && *endptr == '\0')) {
74 DEBUG(RDT_PLUGIN ": Error converting '%s' to unsigned number.", s);
75 return (-EINVAL);
76 }
78 return (0);
79 }
81 /*
82 * NAME
83 * strlisttonums
84 *
85 * DESCRIPTION
86 * Converts string of characters representing list of numbers into array of
87 * numbers. Allowed formats are:
88 * 0,1,2,3
89 * 0-10,20-18
90 * 1,3,5-8,10,0x10-12
91 *
92 * Numbers can be in decimal or hexadecimal format.
93 *
94 * PARAMETERS
95 * `s' String representing list of unsigned numbers.
96 * `nums' Array to put converted numeric values into.
97 * `max' Maximum number of elements that nums can accommodate.
98 *
99 * RETURN VALUE
100 * Number of elements placed into nums.
101 */
102 static size_t strlisttonums(char *s, uint64_t *nums, size_t max) {
103 int ret;
104 size_t index = 0;
105 char *saveptr = NULL;
107 if (s == NULL || nums == NULL || max == 0)
108 return index;
110 for (;;) {
111 char *p = NULL;
112 char *token = NULL;
114 token = strtok_r(s, ",", &saveptr);
115 if (token == NULL)
116 break;
118 s = NULL;
120 while (isspace(*token))
121 token++;
122 if (*token == '\0')
123 continue;
125 p = strchr(token, '-');
126 if (p != NULL) {
127 uint64_t n, start, end;
128 *p = '\0';
129 ret = strtouint64(token, &start);
130 if (ret < 0)
131 return (0);
132 ret = strtouint64(p + 1, &end);
133 if (ret < 0)
134 return (0);
135 if (start > end) {
136 return (0);
137 }
138 for (n = start; n <= end; n++) {
139 if (!(isdup(nums, index, n))) {
140 nums[index] = n;
141 index++;
142 }
143 if (index >= max)
144 return index;
145 }
146 } else {
147 uint64_t val;
149 ret = strtouint64(token, &val);
150 if (ret < 0)
151 return (0);
153 if (!(isdup(nums, index, val))) {
154 nums[index] = val;
155 index++;
156 }
157 if (index >= max)
158 return index;
159 }
160 }
162 return index;
163 }
165 /*
166 * NAME
167 * cgroup_cmp
168 *
169 * DESCRIPTION
170 * Function to compare cores in 2 core groups.
171 *
172 * PARAMETERS
173 * `cg_a' Pointer to core group a.
174 * `cg_b' Pointer to core group b.
175 *
176 * RETURN VALUE
177 * 1 if both groups contain the same cores
178 * 0 if none of their cores match
179 * -1 if some but not all cores match
180 */
181 static int cgroup_cmp(const rdt_core_group_t *cg_a,
182 const rdt_core_group_t *cg_b) {
183 int found = 0;
185 assert(cg_a != NULL);
186 assert(cg_b != NULL);
188 const int sz_a = cg_a->num_cores;
189 const int sz_b = cg_b->num_cores;
190 const unsigned *tab_a = cg_a->cores;
191 const unsigned *tab_b = cg_b->cores;
193 for (int i = 0; i < sz_a; i++) {
194 for (int j = 0; j < sz_b; j++)
195 if (tab_a[i] == tab_b[j])
196 found++;
197 }
198 /* if no cores are the same */
199 if (!found)
200 return 0;
201 /* if group contains same cores */
202 if (sz_a == sz_b && sz_b == found)
203 return 1;
204 /* if not all cores are the same */
205 return -1;
206 }
208 static int cgroup_set(rdt_core_group_t *cg, char *desc, uint64_t *cores,
209 size_t num_cores) {
210 assert(cg != NULL);
211 assert(desc != NULL);
212 assert(cores != NULL);
213 assert(num_cores > 0);
215 cg->cores = calloc(num_cores, sizeof(unsigned));
216 if (cg->cores == NULL) {
217 ERROR(RDT_PLUGIN ": Error allocating core group table");
218 return (-ENOMEM);
219 }
220 cg->num_cores = num_cores;
221 cg->desc = strdup(desc);
222 if (cg->desc == NULL) {
223 ERROR(RDT_PLUGIN ": Error allocating core group description");
224 sfree(cg->cores);
225 return (-ENOMEM);
226 }
228 for (size_t i = 0; i < num_cores; i++)
229 cg->cores[i] = (unsigned)cores[i];
231 return 0;
232 }
234 /*
235 * NAME
236 * oconfig_to_cgroups
237 *
238 * DESCRIPTION
239 * Function to set the descriptions and cores for each core group.
240 * Takes a config option containing list of strings that are used to set
241 * core group values.
242 *
243 * PARAMETERS
244 * `item' Config option containing core groups.
245 * `groups' Table of core groups to set values in.
246 * `max_groups' Maximum number of core groups allowed.
247 * `max_core' Maximum allowed core value.
248 *
249 * RETURN VALUE
250 * On success, the number of core groups set up. On error, appropriate
251 * negative error value.
252 */
253 static int oconfig_to_cgroups(oconfig_item_t *item, rdt_core_group_t *groups,
254 size_t max_groups, uint64_t max_core) {
255 int index = 0;
257 assert(groups != NULL);
258 assert(max_groups > 0);
259 assert(item != NULL);
261 for (int j = 0; j < item->values_num; j++) {
262 int ret;
263 size_t n;
264 uint64_t cores[RDT_MAX_CORES] = {0};
265 char value[DATA_MAX_NAME_LEN];
267 if ((item->values[j].value.string == NULL) || (strlen(item->values[j].value.string) == 0))
268 continue;
270 sstrncpy(value, item->values[j].value.string, sizeof(value));
272 n = strlisttonums(value, cores, STATIC_ARRAY_SIZE(cores));
273 if (n == 0) {
274 ERROR(RDT_PLUGIN ": Error parsing core group (%s)",
275 item->values[j].value.string);
276 return (-EINVAL);
277 }
279 for (int i = 0; i < n; i++) {
280 if (cores[i] > max_core) {
281 ERROR(RDT_PLUGIN ": Core group (%s) contains invalid core id (%d)",
282 item->values[j].value.string, (int)cores[i]);
283 return (-EINVAL);
284 }
285 }
287 /* set core group info */
288 ret = cgroup_set(&groups[index], item->values[j].value.string, cores, n);
289 if (ret < 0)
290 return ret;
292 index++;
294 if (index >= max_groups) {
295 WARNING(RDT_PLUGIN ": Too many core groups configured");
296 return index;
297 }
298 }
300 return index;
301 }
303 #if COLLECT_DEBUG
304 static void rdt_dump_cgroups(void) {
305 char cores[RDT_MAX_CORES * 4];
307 if (g_rdt == NULL)
308 return;
310 DEBUG(RDT_PLUGIN ": Core Groups Dump");
311 DEBUG(RDT_PLUGIN ": groups count: %zu", g_rdt->num_groups);
313 for (int i = 0; i < g_rdt->num_groups; i++) {
315 memset(cores, 0, sizeof(cores));
316 for (int j = 0; j < g_rdt->cgroups[i].num_cores; j++) {
317 snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
318 g_rdt->cgroups[i].cores[j]);
319 }
321 DEBUG(RDT_PLUGIN ": group[%d]:", i);
322 DEBUG(RDT_PLUGIN ": description: %s", g_rdt->cgroups[i].desc);
323 DEBUG(RDT_PLUGIN ": cores: %s", cores);
324 DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->cgroups[i].events);
325 }
327 return;
328 }
330 static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
332 static inline double bytes_to_mb(const double bytes) {
333 return bytes / (1024.0 * 1024.0);
334 }
336 static void rdt_dump_data(void) {
337 /*
338 * CORE - monitored group of cores
339 * RMID - Resource Monitoring ID associated with the monitored group
340 * LLC - last level cache occupancy
341 * MBL - local memory bandwidth
342 * MBR - remote memory bandwidth
343 */
344 DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
345 for (int i = 0; i < g_rdt->num_groups; i++) {
347 const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
349 double llc = bytes_to_kb(pv->llc);
350 double mbr = bytes_to_mb(pv->mbm_remote_delta);
351 double mbl = bytes_to_mb(pv->mbm_local_delta);
353 DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->cgroups[i].desc,
354 g_rdt->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
355 }
356 }
357 #endif /* COLLECT_DEBUG */
359 static void rdt_free_cgroups(void) {
360 for (int i = 0; i < RDT_MAX_CORES; i++) {
361 sfree(g_rdt->cgroups[i].desc);
363 sfree(g_rdt->cgroups[i].cores);
364 g_rdt->cgroups[i].num_cores = 0;
366 sfree(g_rdt->pgroups[i]);
367 }
368 }
370 static int rdt_default_cgroups(void) {
371 int ret;
373 /* configure each core in separate group */
374 for (unsigned i = 0; i < g_rdt->pqos_cpu->num_cores; i++) {
375 char desc[DATA_MAX_NAME_LEN];
376 uint64_t core = i;
378 ssnprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore);
380 /* set core group info */
381 ret = cgroup_set(&g_rdt->cgroups[i], desc, &core, 1);
382 if (ret < 0)
383 return ret;
384 }
386 return g_rdt->pqos_cpu->num_cores;
387 }
389 static int rdt_config_cgroups(oconfig_item_t *item) {
390 int n = 0;
391 enum pqos_mon_event events = 0;
393 if (item == NULL) {
394 DEBUG(RDT_PLUGIN ": cgroups_config: Invalid argument.");
395 return (-EINVAL);
396 }
398 DEBUG(RDT_PLUGIN ": Core groups [%d]:", item->values_num);
399 for (int j = 0; j < item->values_num; j++) {
400 if (item->values[j].type != OCONFIG_TYPE_STRING) {
401 ERROR(RDT_PLUGIN ": given core group value is not a string [idx=%d]",
402 j);
403 return (-EINVAL);
404 }
405 DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string);
406 }
408 n = oconfig_to_cgroups(item, g_rdt->cgroups, RDT_MAX_CORES,
409 g_rdt->pqos_cpu->num_cores-1);
410 if (n < 0) {
411 rdt_free_cgroups();
412 ERROR(RDT_PLUGIN ": Error parsing core groups configuration.");
413 return (-EINVAL);
414 }
416 if (n == 0) {
417 /* create default core groups if "Cores" config option is empty */
418 n = rdt_default_cgroups();
419 if (n < 0) {
420 rdt_free_cgroups();
421 ERROR(RDT_PLUGIN
422 ": Error creating default core groups configuration.");
423 return n;
424 }
425 INFO(RDT_PLUGIN
426 ": No core groups configured. Default core groups created.");
427 }
429 /* Get all available events on this platform */
430 for (int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
431 events |= g_rdt->cap_mon->u.mon->events[i].type;
433 events &= ~(PQOS_PERF_EVENT_LLC_MISS);
435 DEBUG(RDT_PLUGIN ": Number of cores in the system: %u",
436 g_rdt->pqos_cpu->num_cores);
437 DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
439 g_rdt->num_groups = n;
440 for (int i = 0; i < n; i++) {
441 for (int j = 0; j < i; j++) {
442 int found = 0;
443 found = cgroup_cmp(&g_rdt->cgroups[j], &g_rdt->cgroups[i]);
444 if (found != 0) {
445 rdt_free_cgroups();
446 ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups.");
447 return (-EINVAL);
448 }
449 }
451 g_rdt->cgroups[i].events = events;
452 g_rdt->pgroups[i] = calloc(1, sizeof(*g_rdt->pgroups[i]));
453 if (g_rdt->pgroups[i] == NULL) {
454 rdt_free_cgroups();
455 ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data.");
456 return (-ENOMEM);
457 }
458 }
460 return (0);
461 }
463 static int rdt_preinit(void) {
464 int ret;
466 if (g_rdt != NULL) {
467 /* already initialized if config callback was called before init callback */
468 return (0);
469 }
471 g_rdt = calloc(1, sizeof(*g_rdt));
472 if (g_rdt == NULL) {
473 ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context.");
474 return (-ENOMEM);
475 }
477 /* In case previous instance of the application was not closed properly
478 * call fini and ignore return code. */
479 pqos_fini();
481 /* TODO:
482 * stdout should not be used here. Will be reworked when support of log
483 * callback is added to PQoS library.
484 */
485 ret = pqos_init(&(struct pqos_config){.fd_log = STDOUT_FILENO});
486 if (ret != PQOS_RETVAL_OK) {
487 ERROR(RDT_PLUGIN ": Error initializing PQoS library!");
488 goto rdt_preinit_error1;
489 }
491 ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu);
492 if (ret != PQOS_RETVAL_OK) {
493 ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities.");
494 goto rdt_preinit_error2;
495 }
497 ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON,
498 &g_rdt->cap_mon);
499 if (ret == PQOS_RETVAL_PARAM) {
500 ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities.");
501 goto rdt_preinit_error2;
502 }
504 if (g_rdt->cap_mon == NULL) {
505 ERROR(
506 RDT_PLUGIN
507 ": Monitoring capability not detected. Nothing to do for the plugin.");
508 goto rdt_preinit_error2;
509 }
511 return (0);
513 rdt_preinit_error2:
514 pqos_fini();
516 rdt_preinit_error1:
518 sfree(g_rdt);
520 return (-1);
521 }
523 static int rdt_config(oconfig_item_t *ci) {
524 int ret = 0;
526 ret = rdt_preinit();
527 if (ret != 0)
528 return ret;
530 for (int i = 0; i < ci->children_num; i++) {
531 oconfig_item_t *child = ci->children + i;
533 if (strcasecmp("Cores", child->key) == 0) {
535 ret = rdt_config_cgroups(child);
536 if (ret != 0)
537 return ret;
539 #if COLLECT_DEBUG
540 rdt_dump_cgroups();
541 #endif /* COLLECT_DEBUG */
543 } else {
544 ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".",
545 child->key);
546 }
547 }
549 return (0);
550 }
552 static void rdt_submit_derive(char *cgroup, char *type, char *type_instance,
553 derive_t value) {
554 value_list_t vl = VALUE_LIST_INIT;
556 vl.values = &(value_t) { .derive = value };
557 vl.values_len = 1;
559 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
560 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
561 sstrncpy(vl.type, type, sizeof(vl.type));
562 if (type_instance)
563 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
565 plugin_dispatch_values(&vl);
566 }
568 static void rdt_submit_gauge(char *cgroup, char *type, char *type_instance,
569 gauge_t value) {
570 value_list_t vl = VALUE_LIST_INIT;
572 vl.values = &(value_t) { .gauge = value };
573 vl.values_len = 1;
575 sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
576 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
577 sstrncpy(vl.type, type, sizeof(vl.type));
578 if (type_instance)
579 sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
581 plugin_dispatch_values(&vl);
582 }
584 static int rdt_read(__attribute__((unused)) user_data_t *ud) {
585 int ret;
587 if (g_rdt == NULL) {
588 ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized.");
589 return (-EINVAL);
590 }
592 ret = pqos_mon_poll(&g_rdt->pgroups[0], (unsigned)g_rdt->num_groups);
593 if (ret != PQOS_RETVAL_OK) {
594 ERROR(RDT_PLUGIN ": Failed to poll monitoring data.");
595 return (-1);
596 }
598 #if COLLECT_DEBUG
599 rdt_dump_data();
600 #endif /* COLLECT_DEBUG */
602 for (int i = 0; i < g_rdt->num_groups; i++) {
603 enum pqos_mon_event mbm_events =
604 (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
605 PQOS_MON_EVENT_RMEM_BW);
607 const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
609 /* Submit only monitored events data */
611 if (g_rdt->cgroups[i].events & PQOS_MON_EVENT_L3_OCCUP)
612 rdt_submit_gauge(g_rdt->cgroups[i].desc, "bytes", "llc", pv->llc);
614 if (g_rdt->cgroups[i].events & PQOS_PERF_EVENT_IPC)
615 rdt_submit_gauge(g_rdt->cgroups[i].desc, "ipc", NULL, pv->ipc);
617 if (g_rdt->cgroups[i].events & mbm_events) {
618 rdt_submit_derive(g_rdt->cgroups[i].desc, "memory_bandwidth",
619 "local", pv->mbm_local_delta);
620 rdt_submit_derive(g_rdt->cgroups[i].desc, "memory_bandwidth",
621 "remote", pv->mbm_remote_delta);
622 }
623 }
625 return (0);
626 }
628 static int rdt_init(void) {
629 int ret;
631 ret = rdt_preinit();
632 if (ret != 0)
633 return ret;
635 /* Start monitoring */
636 for (int i = 0; i < g_rdt->num_groups; i++) {
637 rdt_core_group_t *cg = &g_rdt->cgroups[i];
639 ret = pqos_mon_start(cg->num_cores, cg->cores, cg->events, (void *)cg->desc,
640 g_rdt->pgroups[i]);
642 if (ret != PQOS_RETVAL_OK)
643 ERROR(RDT_PLUGIN ": Error starting monitoring group %s (pqos status=%d)",
644 cg->desc, ret);
645 }
647 return (0);
648 }
650 static int rdt_shutdown(void) {
651 int ret;
653 DEBUG(RDT_PLUGIN ": rdt_shutdown.");
655 if (g_rdt == NULL)
656 return (0);
658 /* Stop monitoring */
659 for (int i = 0; i < g_rdt->num_groups; i++) {
660 pqos_mon_stop(g_rdt->pgroups[i]);
661 }
663 ret = pqos_fini();
664 if (ret != PQOS_RETVAL_OK)
665 ERROR(RDT_PLUGIN ": Error shutting down PQoS library.");
667 rdt_free_cgroups();
668 sfree(g_rdt);
670 return (0);
671 }
673 void module_register(void) {
674 plugin_register_init(RDT_PLUGIN, rdt_init);
675 plugin_register_complex_config(RDT_PLUGIN, rdt_config);
676 plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL);
677 plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown);
678 }