1 /**
2 * collectd - src/intel_pmu.c
3 *
4 * Copyright(c) 2017 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 **/
28 #include "collectd.h"
29 #include "common.h"
31 #include <jevents.h>
32 #include <jsession.h>
34 #define PMU_PLUGIN "intel_pmu"
36 #define HW_CACHE_READ_ACCESS \
37 (((PERF_COUNT_HW_CACHE_OP_READ) << 8) | \
38 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
40 #define HW_CACHE_WRITE_ACCESS \
41 (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) | \
42 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
44 #define HW_CACHE_PREFETCH_ACCESS \
45 (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) | \
46 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
48 #define HW_CACHE_READ_MISS \
49 (((PERF_COUNT_HW_CACHE_OP_READ) << 8) | \
50 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
52 #define HW_CACHE_WRITE_MISS \
53 (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) | \
54 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
56 #define HW_CACHE_PREFETCH_MISS \
57 (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) | \
58 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
60 struct event_info {
61 char *name;
62 uint64_t config;
63 };
64 typedef struct event_info event_info_t;
66 struct intel_pmu_ctx_s {
67 _Bool hw_cache_events;
68 _Bool kernel_pmu_events;
69 _Bool sw_events;
70 char event_list_fn[PATH_MAX];
71 char **hw_events;
72 size_t hw_events_count;
73 struct eventlist *event_list;
74 };
75 typedef struct intel_pmu_ctx_s intel_pmu_ctx_t;
77 event_info_t g_kernel_pmu_events[] = {
78 {.name = "cpu-cycles", .config = PERF_COUNT_HW_CPU_CYCLES},
79 {.name = "instructions", .config = PERF_COUNT_HW_INSTRUCTIONS},
80 {.name = "cache-references", .config = PERF_COUNT_HW_CACHE_REFERENCES},
81 {.name = "cache-misses", .config = PERF_COUNT_HW_CACHE_MISSES},
82 {.name = "branches", .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
83 {.name = "branch-misses", .config = PERF_COUNT_HW_BRANCH_MISSES},
84 {.name = "bus-cycles", .config = PERF_COUNT_HW_BUS_CYCLES},
85 };
87 event_info_t g_hw_cache_events[] = {
89 {.name = "L1-dcache-loads",
90 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_ACCESS)},
91 {.name = "L1-dcache-load-misses",
92 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_MISS)},
93 {.name = "L1-dcache-stores",
94 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_ACCESS)},
95 {.name = "L1-dcache-store-misses",
96 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_MISS)},
97 {.name = "L1-dcache-prefetches",
98 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_ACCESS)},
99 {.name = "L1-dcache-prefetch-misses",
100 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_MISS)},
102 {.name = "L1-icache-loads",
103 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_ACCESS)},
104 {.name = "L1-icache-load-misses",
105 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_MISS)},
106 {.name = "L1-icache-prefetches",
107 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_ACCESS)},
108 {.name = "L1-icache-prefetch-misses",
109 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_MISS)},
111 {.name = "LLC-loads",
112 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_ACCESS)},
113 {.name = "LLC-load-misses",
114 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_MISS)},
115 {.name = "LLC-stores",
116 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_ACCESS)},
117 {.name = "LLC-store-misses",
118 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_MISS)},
119 {.name = "LLC-prefetches",
120 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_ACCESS)},
121 {.name = "LLC-prefetch-misses",
122 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_MISS)},
124 {.name = "dTLB-loads",
125 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_ACCESS)},
126 {.name = "dTLB-load-misses",
127 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_MISS)},
128 {.name = "dTLB-stores",
129 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_ACCESS)},
130 {.name = "dTLB-store-misses",
131 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_MISS)},
132 {.name = "dTLB-prefetches",
133 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_ACCESS)},
134 {.name = "dTLB-prefetch-misses",
135 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_MISS)},
137 {.name = "iTLB-loads",
138 .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_ACCESS)},
139 {.name = "iTLB-load-misses",
140 .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_MISS)},
142 {.name = "branch-loads",
143 .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_ACCESS)},
144 {.name = "branch-load-misses",
145 .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_MISS)},
146 };
148 event_info_t g_sw_events[] = {
149 {.name = "cpu-clock", .config = PERF_COUNT_SW_CPU_CLOCK},
151 {.name = "task-clock", .config = PERF_COUNT_SW_TASK_CLOCK},
153 {.name = "context-switches", .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
155 {.name = "cpu-migrations", .config = PERF_COUNT_SW_CPU_MIGRATIONS},
157 {.name = "page-faults", .config = PERF_COUNT_SW_PAGE_FAULTS},
159 {.name = "minor-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MIN},
161 {.name = "major-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MAJ},
163 {.name = "alignment-faults", .config = PERF_COUNT_SW_ALIGNMENT_FAULTS},
165 {.name = "emulation-faults", .config = PERF_COUNT_SW_EMULATION_FAULTS},
166 };
168 static intel_pmu_ctx_t g_ctx;
170 #if COLLECT_DEBUG
171 static void pmu_dump_events() {
173 DEBUG(PMU_PLUGIN ": Events:");
175 struct event *e;
177 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
178 DEBUG(PMU_PLUGIN ": event : %s", e->event);
179 DEBUG(PMU_PLUGIN ": group_lead: %d", e->group_leader);
180 DEBUG(PMU_PLUGIN ": end_group : %d", e->end_group);
181 DEBUG(PMU_PLUGIN ": type : %#x", e->attr.type);
182 DEBUG(PMU_PLUGIN ": config : %#x", (unsigned)e->attr.config);
183 DEBUG(PMU_PLUGIN ": size : %d", e->attr.size);
184 }
185 }
187 static void pmu_dump_config(void) {
189 DEBUG(PMU_PLUGIN ": Config:");
190 DEBUG(PMU_PLUGIN ": hw_cache_events : %d", g_ctx.hw_cache_events);
191 DEBUG(PMU_PLUGIN ": kernel_pmu_events : %d", g_ctx.kernel_pmu_events);
192 DEBUG(PMU_PLUGIN ": software_events : %d", g_ctx.sw_events);
194 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
195 DEBUG(PMU_PLUGIN ": hardware_events[%zu]: %s", i, g_ctx.hw_events[i]);
196 }
197 }
199 #endif /* COLLECT_DEBUG */
201 static int pmu_config_hw_events(oconfig_item_t *ci) {
203 if (strcasecmp("HardwareEvents", ci->key) != 0) {
204 return -EINVAL;
205 }
207 g_ctx.hw_events = calloc(ci->values_num, sizeof(char *));
208 if (g_ctx.hw_events == NULL) {
209 ERROR(PMU_PLUGIN ": Failed to allocate hw events.");
210 return -ENOMEM;
211 }
213 for (int i = 0; i < ci->values_num; i++) {
214 if (ci->values[i].type != OCONFIG_TYPE_STRING) {
215 WARNING(PMU_PLUGIN ": The %s option requires string arguments.", ci->key);
216 continue;
217 }
219 g_ctx.hw_events[g_ctx.hw_events_count] = strdup(ci->values[i].value.string);
220 if (g_ctx.hw_events[g_ctx.hw_events_count] == NULL) {
221 ERROR(PMU_PLUGIN ": Failed to allocate hw events entry.");
222 return -ENOMEM;
223 }
225 g_ctx.hw_events_count++;
226 }
228 return 0;
229 }
231 static int pmu_config(oconfig_item_t *ci) {
233 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
235 for (int i = 0; i < ci->children_num; i++) {
236 int ret = 0;
237 oconfig_item_t *child = ci->children + i;
239 if (strcasecmp("ReportHardwareCacheEvents", child->key) == 0) {
240 ret = cf_util_get_boolean(child, &g_ctx.hw_cache_events);
241 } else if (strcasecmp("ReportKernelPMUEvents", child->key) == 0) {
242 ret = cf_util_get_boolean(child, &g_ctx.kernel_pmu_events);
243 } else if (strcasecmp("EventList", child->key) == 0) {
244 ret = cf_util_get_string_buffer(child, g_ctx.event_list_fn,
245 sizeof(g_ctx.event_list_fn));
246 } else if (strcasecmp("HardwareEvents", child->key) == 0) {
247 ret = pmu_config_hw_events(child);
248 } else if (strcasecmp("ReportSoftwareEvents", child->key) == 0) {
249 ret = cf_util_get_boolean(child, &g_ctx.sw_events);
250 } else {
251 ERROR(PMU_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
252 ret = -1;
253 }
255 if (ret != 0) {
256 DEBUG(PMU_PLUGIN ": %s:%d ret=%d", __FUNCTION__, __LINE__, ret);
257 return ret;
258 }
259 }
261 #if COLLECT_DEBUG
262 pmu_dump_config();
263 #endif
265 return 0;
266 }
268 static void pmu_submit_counter(int cpu, char *event, counter_t value,
269 meta_data_t *meta) {
270 value_list_t vl = VALUE_LIST_INIT;
272 vl.values = &(value_t){.counter = value};
273 vl.values_len = 1;
275 sstrncpy(vl.plugin, PMU_PLUGIN, sizeof(vl.plugin));
276 if (cpu == -1) {
277 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "all");
278 } else {
279 vl.meta = meta;
280 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%d", cpu);
281 }
282 sstrncpy(vl.type, "counter", sizeof(vl.type));
283 sstrncpy(vl.type_instance, event, sizeof(vl.type_instance));
285 plugin_dispatch_values(&vl);
286 }
288 meta_data_t *pmu_meta_data_create(const struct efd *efd) {
289 meta_data_t *meta = NULL;
291 /* create meta data only if value was scaled */
292 if (efd->val[1] == efd->val[2] || !efd->val[2]) {
293 return NULL;
294 }
296 meta = meta_data_create();
297 if (meta == NULL) {
298 ERROR(PMU_PLUGIN ": meta_data_create failed.");
299 return NULL;
300 }
302 meta_data_add_unsigned_int(meta, "intel_pmu:raw_count", efd->val[0]);
303 meta_data_add_unsigned_int(meta, "intel_pmu:time_enabled", efd->val[1]);
304 meta_data_add_unsigned_int(meta, "intel_pmu:time_running", efd->val[2]);
306 return meta;
307 }
309 static void pmu_dispatch_data(void) {
311 struct event *e;
313 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
314 uint64_t all_value = 0;
315 int event_enabled = 0;
316 for (int i = 0; i < g_ctx.event_list->num_cpus; i++) {
318 if (e->efd[i].fd < 0)
319 continue;
321 event_enabled++;
323 /* If there are more events than counters, the kernel uses time
324 * multiplexing. With multiplexing, at the end of the run,
325 * the counter is scaled basing on total time enabled vs time running.
326 * final_count = raw_count * time_enabled/time_running
327 */
328 uint64_t value = event_scaled_value(e, i);
329 all_value += value;
331 /* get meta data with information about scaling */
332 meta_data_t *meta = pmu_meta_data_create(&e->efd[i]);
334 /* dispatch per CPU value */
335 pmu_submit_counter(i, e->event, value, meta);
337 meta_data_destroy(meta);
338 }
340 if (event_enabled > 0) {
341 DEBUG(PMU_PLUGIN ": %-20s %'10lu", e->event, all_value);
342 /* dispatch all CPU value */
343 pmu_submit_counter(-1, e->event, all_value, NULL);
344 }
345 }
346 }
348 static int pmu_read(__attribute__((unused)) user_data_t *ud) {
349 int ret;
351 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
353 ret = read_all_events(g_ctx.event_list);
354 if (ret != 0) {
355 ERROR(PMU_PLUGIN ": Failed to read values of all events.");
356 return ret;
357 }
359 pmu_dispatch_data();
361 return 0;
362 }
364 static int pmu_add_events(struct eventlist *el, uint32_t type,
365 event_info_t *events, size_t count) {
367 for (size_t i = 0; i < count; i++) {
368 /* Allocate memory for event struct that contains array of efd structs
369 for all cores */
370 struct event *e =
371 calloc(sizeof(struct event) + sizeof(struct efd) * el->num_cpus, 1);
372 if (e == NULL) {
373 ERROR(PMU_PLUGIN ": Failed to allocate event structure");
374 return -ENOMEM;
375 }
377 e->attr.type = type;
378 e->attr.config = events[i].config;
379 e->attr.size = PERF_ATTR_SIZE_VER0;
380 if (!el->eventlist)
381 el->eventlist = e;
382 if (el->eventlist_last)
383 el->eventlist_last->next = e;
384 el->eventlist_last = e;
385 e->event = strdup(events[i].name);
386 }
388 return 0;
389 }
391 static int pmu_add_hw_events(struct eventlist *el, char **e, size_t count) {
393 for (size_t i = 0; i < count; i++) {
395 size_t group_events_count = 0;
397 char *events = strdup(e[i]);
398 if (!events)
399 return -1;
401 char *s, *tmp;
402 for (s = strtok_r(events, ",", &tmp); s; s = strtok_r(NULL, ",", &tmp)) {
404 /* Multiple events parsed in one entry */
405 if (group_events_count == 1) {
406 /* Mark previously added event as group leader */
407 el->eventlist_last->group_leader = 1;
408 }
410 /* Allocate memory for event struct that contains array of efd structs
411 for all cores */
412 struct event *e =
413 calloc(sizeof(struct event) + sizeof(struct efd) * el->num_cpus, 1);
414 if (e == NULL) {
415 free(events);
416 return -ENOMEM;
417 }
419 if (resolve_event(s, &e->attr) == 0) {
420 e->next = NULL;
421 if (!el->eventlist)
422 el->eventlist = e;
423 if (el->eventlist_last)
424 el->eventlist_last->next = e;
425 el->eventlist_last = e;
426 e->event = strdup(s);
427 } else {
428 DEBUG(PMU_PLUGIN ": Cannot resolve %s", s);
429 sfree(e);
430 }
432 group_events_count++;
433 }
435 /* Multiple events parsed in one entry */
436 if (group_events_count > 1) {
437 /* Mark last added event as group end */
438 el->eventlist_last->end_group = 1;
439 }
441 free(events);
442 }
444 return 0;
445 }
447 static void pmu_free_events(struct eventlist *el) {
449 if (el == NULL)
450 return;
452 struct event *e = el->eventlist;
454 while (e) {
455 struct event *next = e->next;
456 sfree(e);
457 e = next;
458 }
460 el->eventlist = NULL;
461 }
463 static int pmu_setup_events(struct eventlist *el, bool measure_all,
464 int measure_pid) {
465 struct event *e, *leader = NULL;
466 int ret = -1;
468 for (e = el->eventlist; e; e = e->next) {
470 for (int i = 0; i < el->num_cpus; i++) {
471 if (setup_event(e, i, leader, measure_all, measure_pid) < 0) {
472 WARNING(PMU_PLUGIN ": perf event '%s' is not available (cpu=%d).",
473 e->event, i);
474 } else {
475 /* success if at least one event was set */
476 ret = 0;
477 }
478 }
480 if (e->group_leader)
481 leader = e;
482 if (e->end_group)
483 leader = NULL;
484 }
486 return ret;
487 }
489 static int pmu_init(void) {
490 int ret;
492 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
494 g_ctx.event_list = alloc_eventlist();
495 if (g_ctx.event_list == NULL) {
496 ERROR(PMU_PLUGIN ": Failed to allocate event list.");
497 return -ENOMEM;
498 }
500 if (g_ctx.hw_cache_events) {
501 ret =
502 pmu_add_events(g_ctx.event_list, PERF_TYPE_HW_CACHE, g_hw_cache_events,
503 STATIC_ARRAY_SIZE(g_hw_cache_events));
504 if (ret != 0) {
505 ERROR(PMU_PLUGIN ": Failed to add hw cache events.");
506 goto init_error;
507 }
508 }
510 if (g_ctx.kernel_pmu_events) {
511 ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_HARDWARE,
512 g_kernel_pmu_events,
513 STATIC_ARRAY_SIZE(g_kernel_pmu_events));
514 if (ret != 0) {
515 ERROR(PMU_PLUGIN ": Failed to add kernel PMU events.");
516 goto init_error;
517 }
518 }
520 /* parse events names if config option is present and is not empty */
521 if (g_ctx.hw_events_count) {
523 ret = read_events(g_ctx.event_list_fn);
524 if (ret != 0) {
525 ERROR(PMU_PLUGIN ": Failed to read event list file '%s'.",
526 g_ctx.event_list_fn);
527 return ret;
528 }
530 ret = pmu_add_hw_events(g_ctx.event_list, g_ctx.hw_events,
531 g_ctx.hw_events_count);
532 if (ret != 0) {
533 ERROR(PMU_PLUGIN ": Failed to add hardware events.");
534 goto init_error;
535 }
536 }
538 if (g_ctx.sw_events) {
539 ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_SOFTWARE, g_sw_events,
540 STATIC_ARRAY_SIZE(g_sw_events));
541 if (ret != 0) {
542 ERROR(PMU_PLUGIN ": Failed to add software events.");
543 goto init_error;
544 }
545 }
547 #if COLLECT_DEBUG
548 pmu_dump_events();
549 #endif
551 if (g_ctx.event_list->eventlist != NULL) {
552 /* measure all processes */
553 ret = pmu_setup_events(g_ctx.event_list, true, -1);
554 if (ret != 0) {
555 ERROR(PMU_PLUGIN ": Failed to setup perf events for the event list.");
556 goto init_error;
557 }
558 } else {
559 WARNING(PMU_PLUGIN
560 ": Events list is empty. No events were setup for monitoring.");
561 }
563 return 0;
565 init_error:
567 pmu_free_events(g_ctx.event_list);
568 sfree(g_ctx.event_list);
569 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
570 sfree(g_ctx.hw_events[i]);
571 }
572 sfree(g_ctx.hw_events);
573 g_ctx.hw_events_count = 0;
575 return ret;
576 }
578 static int pmu_shutdown(void) {
580 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
582 pmu_free_events(g_ctx.event_list);
583 sfree(g_ctx.event_list);
584 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
585 sfree(g_ctx.hw_events[i]);
586 }
587 sfree(g_ctx.hw_events);
588 g_ctx.hw_events_count = 0;
590 return 0;
591 }
593 void module_register(void) {
594 plugin_register_init(PMU_PLUGIN, pmu_init);
595 plugin_register_complex_config(PMU_PLUGIN, pmu_config);
596 plugin_register_complex_read(NULL, PMU_PLUGIN, pmu_read, 0, NULL);
597 plugin_register_shutdown(PMU_PLUGIN, pmu_shutdown);
598 }