summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: d2ffa2d)
raw | patch | inline | side by side (parent: d2ffa2d)
author | Pshyk, SerhiyX <serhiyx.pshyk@intel.com> | |
Thu, 6 Oct 2016 11:19:56 +0000 (12:19 +0100) | ||
committer | Kim Jones <kim-marie.jones@intel.com> | |
Thu, 6 Oct 2016 06:02:44 +0000 (07:02 +0100) |
Change-Id: Id23eb96fd37e6d4fc5fdf7e7ed58d9e74a33cca0
Signed-off-by: Serhiy Pshyk <serhiyx.pshyk@intel.com>
Signed-off-by: Serhiy Pshyk <serhiyx.pshyk@intel.com>
README | patch | blob | history | |
configure.ac | patch | blob | history | |
src/Makefile.am | patch | blob | history | |
src/collectd.conf.in | patch | blob | history | |
src/collectd.conf.pod | patch | blob | history | |
src/intel_rdt.c | [new file with mode: 0644] | patch | blob |
src/rdtmon.c | [deleted file] | patch | blob | history |
index 12c6574fcffbd764f6e3b21f5c03f75e28785a40..9b8e5d4c79c33a9fae9272a4109ce899ce38763c 100644 (file)
--- a/README
+++ b/README
hugepages can be found here:
https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt.
+ - intel_rdt
+ The intel_rdt plugin collects information provided by monitoring features
+ of Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring
+ Technology (CMT), Memory Bandwidth Monitoring (MBM). These features
+ provide information about utilization of shared resources like last level
+ cache occupancy, local memory bandwidth usage, remote memory bandwidth
+ usage, instructions per clock.
+ <https://01.org/packet-processing/cache-monitoring-technology-memory-bandwidth-monitoring-cache-allocation-technology-code-and-data>
+
- interface
Interface traffic: Number of octets, packets and errors for each
interface.
collectd without the need to start a heavy interpreter every interval.
See collectd-python(5) for details.
- - rdtmon
- The rdtmon plugin collects information provided by monitoring features of
- Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring
- Technology (CMT), Memory Bandwidth Monitoring (MBM). These features
- provide information about utilization of shared resources like last level
- cache occupancy, local memory bandwidth usage, remote memory bandwidth
- usage, instructions per clock.
- <https://01.org/packet-processing/cache-monitoring-technology-memory-bandwidth-monitoring-cache-allocation-technology-code-and-data>
-
- redis
The redis plugin gathers information from a Redis server, including:
uptime, used memory, total connections etc.
* libpqos (optional)
The PQoS library for Intel(R) Resource Director Technology used by the
- `rdtmon' plugin.
+ `intel_rdt' plugin.
<https://github.com/01org/intel-cmt-cat>
* libprotobuf, protoc 3.0+ (optional)
diff --git a/configure.ac b/configure.ac
index a74864b8e6779791a2dd3e366e1baf9ace1835f3..07f93922034ac0ab9cc768ccf7ea92acf59a1dc7 100644 (file)
--- a/configure.ac
+++ b/configure.ac
plugin_gps="no"
plugin_grpc="no"
plugin_hugepages="no"
+plugin_intel_rdt="no"
plugin_interface="no"
plugin_ipmi="no"
plugin_ipvs="no"
plugin_processes="no"
plugin_protocols="no"
plugin_python="no"
-plugin_rdtmon="no"
plugin_serial="no"
plugin_smart="no"
plugin_swap="no"
AC_PLUGIN([grpc], [$plugin_grpc], [gRPC plugin])
AC_PLUGIN([hddtemp], [yes], [Query hddtempd])
AC_PLUGIN([hugepages], [$plugin_hugepages], [Hugepages statistics])
+AC_PLUGIN([intel_rdt], [$with_libpqos], [Intel RDT monitor plugin])
AC_PLUGIN([interface], [$plugin_interface], [Interface traffic statistics])
AC_PLUGIN([ipc], [$plugin_ipc], [IPC statistics])
AC_PLUGIN([ipmi], [$plugin_ipmi], [IPMI sensor statistics])
AC_PLUGIN([processes], [$plugin_processes], [Process statistics])
AC_PLUGIN([protocols], [$plugin_protocols], [Protocol (IP, TCP, ...) statistics])
AC_PLUGIN([python], [$plugin_python], [Embed a Python interpreter])
-AC_PLUGIN([rdtmon], [$with_libpqos], [RDT monitor plugin])
AC_PLUGIN([redis], [$with_libhiredis], [Redis plugin])
AC_PLUGIN([routeros], [$with_librouteros], [RouterOS plugin])
AC_PLUGIN([rrdcached], [$librrd_rrdc_update], [RRDTool output plugin])
AC_MSG_RESULT([ grpc . . . . . . . . $enable_grpc])
AC_MSG_RESULT([ hddtemp . . . . . . . $enable_hddtemp])
AC_MSG_RESULT([ hugepages . . . . . . $enable_hugepages])
+AC_MSG_RESULT([ intel_rdt. . . . . . $enable_intel_rdt])
AC_MSG_RESULT([ interface . . . . . . $enable_interface])
AC_MSG_RESULT([ ipc . . . . . . . . . $enable_ipc])
AC_MSG_RESULT([ ipmi . . . . . . . . $enable_ipmi])
AC_MSG_RESULT([ protocols . . . . . . $enable_protocols])
AC_MSG_RESULT([ python . . . . . . . $enable_python])
AC_MSG_RESULT([ redis . . . . . . . . $enable_redis])
-AC_MSG_RESULT([ rdtmon . . . . . . . $enable_rdtmon])
AC_MSG_RESULT([ routeros . . . . . . $enable_routeros])
AC_MSG_RESULT([ rrdcached . . . . . . $enable_rrdcached])
AC_MSG_RESULT([ rrdtool . . . . . . . $enable_rrdtool])
diff --git a/src/Makefile.am b/src/Makefile.am
index 5001fdef9a53b41ab0d53fdac3470a86eafb6cf1..f48fdf377eda70bc6c65de54f61b5b7a41ae8d87 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
protocols_la_LDFLAGS = $(PLUGIN_LDFLAGS)
endif
-if BUILD_PLUGIN_RDTMON
-pkglib_LTLIBRARIES += rdtmon.la
-rdtmon_la_SOURCES = rdtmon.c
-rdtmon_la_LDFLAGS = $(PLUGIN_LDFLAGS) $(BUILD_WITH_LIBPQOS_LDFLAGS)
-rdtmon_la_CFLAGS = $(AM_CFLAGS) $(BUILD_WITH_LIBPQOS_CPPFLAGS)
-rdtmon_la_LIBADD = $(BUILD_WITH_LIBPQOS_LIBS)
+if BUILD_PLUGIN_INTEL_RDT
+pkglib_LTLIBRARIES += intel_rdt.la
+intel_rdt_la_SOURCES = intel_rdt.c
+intel_rdt_la_LDFLAGS = $(PLUGIN_LDFLAGS) $(BUILD_WITH_LIBPQOS_LDFLAGS)
+intel_rdt_la_CFLAGS = $(AM_CFLAGS) $(BUILD_WITH_LIBPQOS_CPPFLAGS)
+intel_rdt_la_LIBADD = $(BUILD_WITH_LIBPQOS_LIBS)
endif
if BUILD_PLUGIN_REDIS
diff --git a/src/collectd.conf.in b/src/collectd.conf.in
index 52028338551ff964a4edbe5eab6394c76f474778..f81d7718bce3e0e70c08250d62ffef896a2e3ee0 100644 (file)
--- a/src/collectd.conf.in
+++ b/src/collectd.conf.in
#@BUILD_PLUGIN_GRPC_TRUE@LoadPlugin grpc
#@BUILD_PLUGIN_HDDTEMP_TRUE@LoadPlugin hddtemp
#@BUILD_PLUGIN_HUGEPAGES_TRUE@LoadPlugin hugepages
+#@BUILD_PLUGIN_INTEL_RDT_TRUE@LoadPlugin intel_rdt
@BUILD_PLUGIN_INTERFACE_TRUE@@BUILD_PLUGIN_INTERFACE_TRUE@LoadPlugin interface
#@BUILD_PLUGIN_IPC_TRUE@LoadPlugin ipc
#@BUILD_PLUGIN_IPMI_TRUE@LoadPlugin ipmi
#@BUILD_PLUGIN_PROTOCOLS_TRUE@LoadPlugin protocols
#@BUILD_PLUGIN_PYTHON_TRUE@LoadPlugin python
#@BUILD_PLUGIN_REDIS_TRUE@LoadPlugin redis
-#@BUILD_PLUGIN_RDTMON_TRUE@LoadPlugin rdtmon
#@BUILD_PLUGIN_ROUTEROS_TRUE@LoadPlugin routeros
#@BUILD_PLUGIN_RRDCACHED_TRUE@LoadPlugin rrdcached
@LOAD_PLUGIN_RRDTOOL@LoadPlugin rrdtool
# ValuesPercentage false
#</Plugin>
+#<Plugin "intel_rdt">
+# Cores "0-2"
+#</Plugin>
+
#<Plugin interface>
# Interface "eth0"
# IgnoreSelected false
# </Module>
#</Plugin>
-#<Plugin "rdtmon">
-# Cores "0-2"
-#</Plugin>
-
#<Plugin redis>
# <Node example>
# Host "redis.example.com"
diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod
index 52f73242da10e4144253ce9af25f7f30ca44ebd5..36a6d00edcd005ed7bf4cc61fd33b16c90bd7176 100644 (file)
--- a/src/collectd.conf.pod
+++ b/src/collectd.conf.pod
=back
+=head2 Plugin C<intel_rdt>
+
+The I<intel_rdt> plugin collects information provided by monitoring features of
+Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring
+Technology (CMT), Memory Bandwidth Monitoring (MBM). These features provide
+information about utilization of shared resources. CMT monitors last level cache
+occupancy (LLC). MBM supports two types of events reporting local and remote
+memory bandwidth. Local memory bandwidth (MBL) reports the bandwidth of
+accessing memory associated with the local socket. Remote memory bandwidth (MBR)
+reports the bandwidth of accessing the remote socket. Also this technology
+allows to monitor instructions per clock (IPC).
+Monitor events are hardware dependant. Monitoring capabilities are detected on
+plugin initialization and only supported events are monitored.
+
+B<Synopsis:>
+
+ <Plugin "intel_rdt">
+ Cores "0-2" "3,4,6" "8-10,15"
+ </Plugin>
+
+B<Options:>
+
+=over 4
+
+=item B<Interval> I<seconds>
+
+The interval within which to retrieve statistics on monitored events in seconds.
+For milliseconds divide the time by 1000 for example if the desired interval
+is 50ms, set interval to 0.05. Due to limited capacity of counters it is not
+recommended to set interval higher than 1 sec.
+
+=item B<Cores> I<cores groups>
+
+All events are reported on a per core basis. Monitoring of the events can be
+configured for group of cores (aggregated statistics). This field defines groups
+of cores on which to monitor supported events. The field is represented as list
+of strings with core group values. Each string represents a list of cores in a
+group. Allowed formats are:
+ 0,1,2,3
+ 0-10,20-18
+ 1,3,5-8,10,0x10-12
+
+If an empty string is provided as value for this field default cores
+configuration is applied - a separate group is created for each core.
+
+=back
+
+B<Note:> By default global interval is used to retrieve statistics on monitored
+events. To configure a plugin specific interval use B<Interval> option of the
+intel_rdt <LoadPlugin> block. For milliseconds divide the time by 1000 for
+example if the desired interval is 50ms, set interval to 0.05.
+Due to limited capacity of counters it is not recommended to set interval higher
+than 1 sec.
+
=head2 Plugin C<interface>
=over 4
=back
-=head2 Plugin C<rdtmon>
-
-The I<rdtmon> plugin collects information provided by monitoring features of
-Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring
-Technology (CMT), Memory Bandwidth Monitoring (MBM). These features provide
-information about utilization of shared resources. CMT monitors last level cache
-occupancy (LLC). MBM supports two types of events reporting local and remote
-memory bandwidth. Local memory bandwidth (MBL) reports the bandwidth of
-accessing memory associated with the local socket. Remote memory bandwidth (MBR)
-reports the bandwidth of accessing the remote socket. Also this technology
-allows to monitor instructions per clock (IPC).
-Monitor events are hardware dependant. Monitoring capabilities are detected on
-plugin initialization and only supported events are monitored.
-
-B<Synopsis:>
-
- <Plugin "rdtmon">
- Cores "0-2" "3,4,6" "8-10,15"
- </Plugin>
-
-B<Options:>
-
-=over 4
-
-=item B<Interval> I<seconds>
-
-The interval within which to retrieve statistics on monitored events in seconds.
-For milliseconds divide the time by 1000 for example if the desired interval
-is 50ms, set interval to 0.05. Due to limited capacity of counters it is not
-recommended to set interval higher than 1 sec.
-
-=item B<Cores> I<cores groups>
-
-All events are reported on a per core basis. Monitoring of the events can be
-configured for group of cores (aggregated statistics). This field defines groups
-of cores on which to monitor supported events. The field is represented as list
-of strings with core group values. Each string represents a list of cores in a
-group. Allowed formats are:
- 0,1,2,3
- 0-10,20-18
- 1,3,5-8,10,0x10-12
-
-If an empty string is provided as value for this field default cores
-configuration is applied - a separate group is created for each core.
-
-=back
-
-B<Note:> By default global interval is used to retrieve statistics on monitored
-events. To configure a plugin specific interval use B<Interval> option of the
-rdtmon <LoadPlugin> block. For milliseconds divide the time by 1000 for example
-if the desired interval is 50ms, set interval to 0.05.
-Due to limited capacity of counters it is not recommended to set interval higher
-than 1 sec.
-
=head2 Plugin C<redis>
The I<Redis plugin> connects to one or more Redis servers and gathers
diff --git a/src/intel_rdt.c b/src/intel_rdt.c
--- /dev/null
+++ b/src/intel_rdt.c
@@ -0,0 +1,678 @@
+/**
+ * collectd - src/intel_rdt.c
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Serhiy Pshyk <serhiyx.pshyk@intel.com>
+ **/
+
+#include <pqos.h>
+
+#include "common.h"
+
+#define RDT_PLUGIN "intel_rdt"
+
+#define RDT_MAX_SOCKETS 8
+#define RDT_MAX_SOCKET_CORES 64
+#define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS)
+
+struct rdt_core_group_s {
+ char *desc;
+ size_t num_cores;
+ unsigned *cores;
+ enum pqos_mon_event events;
+};
+typedef struct rdt_core_group_s rdt_core_group_t;
+
+struct rdt_ctx_s {
+ rdt_core_group_t cgroups[RDT_MAX_CORES];
+ struct pqos_mon_data *pgroups[RDT_MAX_CORES];
+ size_t num_groups;
+ const struct pqos_cpuinfo *pqos_cpu;
+ const struct pqos_cap *pqos_cap;
+ const struct pqos_capability *cap_mon;
+};
+typedef struct rdt_ctx_s rdt_ctx_t;
+
+static rdt_ctx_t *g_rdt = NULL;
+
+static int isdup(const uint64_t *nums, size_t size, uint64_t val) {
+ for (size_t i = 0; i < size; i++)
+ if (nums[i] == val)
+ return 1;
+ return 0;
+}
+
+static int strtouint64(const char *s, uint64_t *n) {
+ char *endptr = NULL;
+
+ assert(s != NULL);
+ assert(n != NULL);
+
+ *n = strtoull(s, &endptr, 0);
+
+ if (!(*s != '\0' && *endptr == '\0')) {
+ DEBUG(RDT_PLUGIN ": Error converting '%s' to unsigned number.", s);
+ return (-EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * NAME
+ * strlisttonums
+ *
+ * DESCRIPTION
+ * Converts string of characters representing list of numbers into array of
+ * numbers. Allowed formats are:
+ * 0,1,2,3
+ * 0-10,20-18
+ * 1,3,5-8,10,0x10-12
+ *
+ * Numbers can be in decimal or hexadecimal format.
+ *
+ * PARAMETERS
+ * `s' String representing list of unsigned numbers.
+ * `nums' Array to put converted numeric values into.
+ * `max' Maximum number of elements that nums can accommodate.
+ *
+ * RETURN VALUE
+ * Number of elements placed into nums.
+ */
+static size_t strlisttonums(char *s, uint64_t *nums, size_t max) {
+ int ret;
+ size_t index = 0;
+ char *saveptr = NULL;
+
+ if (s == NULL || nums == NULL || max == 0)
+ return index;
+
+ for (;;) {
+ char *p = NULL;
+ char *token = NULL;
+
+ token = strtok_r(s, ",", &saveptr);
+ if (token == NULL)
+ break;
+
+ s = NULL;
+
+ while (isspace(*token))
+ token++;
+ if (*token == '\0')
+ continue;
+
+ p = strchr(token, '-');
+ if (p != NULL) {
+ uint64_t n, start, end;
+ *p = '\0';
+ ret = strtouint64(token, &start);
+ if (ret < 0)
+ return (0);
+ ret = strtouint64(p + 1, &end);
+ if (ret < 0)
+ return (0);
+ if (start > end) {
+ return (0);
+ }
+ for (n = start; n <= end; n++) {
+ if (!(isdup(nums, index, n))) {
+ nums[index] = n;
+ index++;
+ }
+ if (index >= max)
+ return index;
+ }
+ } else {
+ uint64_t val;
+
+ ret = strtouint64(token, &val);
+ if (ret < 0)
+ return (0);
+
+ if (!(isdup(nums, index, val))) {
+ nums[index] = val;
+ index++;
+ }
+ if (index >= max)
+ return index;
+ }
+ }
+
+ return index;
+}
+
+/*
+ * NAME
+ * cgroup_cmp
+ *
+ * DESCRIPTION
+ * Function to compare cores in 2 core groups.
+ *
+ * PARAMETERS
+ * `cg_a' Pointer to core group a.
+ * `cg_b' Pointer to core group b.
+ *
+ * RETURN VALUE
+ * 1 if both groups contain the same cores
+ * 0 if none of their cores match
+ * -1 if some but not all cores match
+ */
+static int cgroup_cmp(const rdt_core_group_t *cg_a,
+ const rdt_core_group_t *cg_b) {
+ int found = 0;
+
+ assert(cg_a != NULL);
+ assert(cg_b != NULL);
+
+ const int sz_a = cg_a->num_cores;
+ const int sz_b = cg_b->num_cores;
+ const unsigned *tab_a = cg_a->cores;
+ const unsigned *tab_b = cg_b->cores;
+
+ for (int i = 0; i < sz_a; i++) {
+ for (int j = 0; j < sz_b; j++)
+ if (tab_a[i] == tab_b[j])
+ found++;
+ }
+ /* if no cores are the same */
+ if (!found)
+ return 0;
+ /* if group contains same cores */
+ if (sz_a == sz_b && sz_b == found)
+ return 1;
+ /* if not all cores are the same */
+ return -1;
+}
+
+static int cgroup_set(rdt_core_group_t *cg, char *desc, uint64_t *cores,
+ size_t num_cores) {
+ assert(cg != NULL);
+ assert(desc != NULL);
+ assert(cores != NULL);
+ assert(num_cores > 0);
+
+ cg->cores = calloc(num_cores, sizeof(unsigned));
+ if (cg->cores == NULL) {
+ ERROR(RDT_PLUGIN ": Error allocating core group table");
+ return (-ENOMEM);
+ }
+ cg->num_cores = num_cores;
+ cg->desc = strdup(desc);
+ if (cg->desc == NULL) {
+ ERROR(RDT_PLUGIN ": Error allocating core group description");
+ sfree(cg->cores);
+ return (-ENOMEM);
+ }
+
+ for (size_t i = 0; i < num_cores; i++)
+ cg->cores[i] = (unsigned)cores[i];
+
+ return 0;
+}
+
+/*
+ * NAME
+ * oconfig_to_cgroups
+ *
+ * DESCRIPTION
+ * Function to set the descriptions and cores for each core group.
+ * Takes a config option containing list of strings that are used to set
+ * core group values.
+ *
+ * PARAMETERS
+ * `item' Config option containing core groups.
+ * `groups' Table of core groups to set values in.
+ * `max_groups' Maximum number of core groups allowed.
+ * `max_core' Maximum allowed core value.
+ *
+ * RETURN VALUE
+ * On success, the number of core groups set up. On error, appropriate
+ * negative error value.
+ */
+static int oconfig_to_cgroups(oconfig_item_t *item, rdt_core_group_t *groups,
+ size_t max_groups, uint64_t max_core) {
+ int index = 0;
+
+ assert(groups != NULL);
+ assert(max_groups > 0);
+ assert(item != NULL);
+
+ for (int j = 0; j < item->values_num; j++) {
+ int ret;
+ size_t n;
+ uint64_t cores[RDT_MAX_CORES] = {0};
+ char value[DATA_MAX_NAME_LEN];
+
+ if ((item->values[j].value.string == NULL) || (strlen(item->values[j].value.string) == 0))
+ continue;
+
+ sstrncpy(value, item->values[j].value.string, sizeof(value));
+
+ n = strlisttonums(value, cores, STATIC_ARRAY_SIZE(cores));
+ if (n == 0) {
+ ERROR(RDT_PLUGIN ": Error parsing core group (%s)",
+ item->values[j].value.string);
+ return (-EINVAL);
+ }
+
+ for (int i = 0; i < n; i++) {
+ if (cores[i] > max_core) {
+ ERROR(RDT_PLUGIN ": Core group (%s) contains invalid core id (%d)",
+ item->values[j].value.string, (int)cores[i]);
+ return (-EINVAL);
+ }
+ }
+
+ /* set core group info */
+ ret = cgroup_set(&groups[index], item->values[j].value.string, cores, n);
+ if (ret < 0)
+ return ret;
+
+ index++;
+
+ if (index >= max_groups) {
+ WARNING(RDT_PLUGIN ": Too many core groups configured");
+ return index;
+ }
+ }
+
+ return index;
+}
+
+#if COLLECT_DEBUG
+static void rdt_dump_cgroups(void) {
+ char cores[RDT_MAX_CORES * 4];
+
+ if (g_rdt == NULL)
+ return;
+
+ DEBUG(RDT_PLUGIN ": Core Groups Dump");
+ DEBUG(RDT_PLUGIN ": groups count: %zu", g_rdt->num_groups);
+
+ for (int i = 0; i < g_rdt->num_groups; i++) {
+
+ memset(cores, 0, sizeof(cores));
+ for (int j = 0; j < g_rdt->cgroups[i].num_cores; j++) {
+ snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
+ g_rdt->cgroups[i].cores[j]);
+ }
+
+ DEBUG(RDT_PLUGIN ": group[%d]:", i);
+ DEBUG(RDT_PLUGIN ": description: %s", g_rdt->cgroups[i].desc);
+ DEBUG(RDT_PLUGIN ": cores: %s", cores);
+ DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->cgroups[i].events);
+ }
+
+ return;
+}
+
+static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
+
+static inline double bytes_to_mb(const double bytes) {
+ return bytes / (1024.0 * 1024.0);
+}
+
+static void rdt_dump_data(void) {
+ /*
+ * CORE - monitored group of cores
+ * RMID - Resource Monitoring ID associated with the monitored group
+ * LLC - last level cache occupancy
+ * MBL - local memory bandwidth
+ * MBR - remote memory bandwidth
+ */
+ DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
+ for (int i = 0; i < g_rdt->num_groups; i++) {
+
+ const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
+
+ double llc = bytes_to_kb(pv->llc);
+ double mbr = bytes_to_mb(pv->mbm_remote_delta);
+ double mbl = bytes_to_mb(pv->mbm_local_delta);
+
+ DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->cgroups[i].desc,
+ g_rdt->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
+ }
+}
+#endif /* COLLECT_DEBUG */
+
+static void rdt_free_cgroups(void) {
+ for (int i = 0; i < RDT_MAX_CORES; i++) {
+ sfree(g_rdt->cgroups[i].desc);
+
+ sfree(g_rdt->cgroups[i].cores);
+ g_rdt->cgroups[i].num_cores = 0;
+
+ sfree(g_rdt->pgroups[i]);
+ }
+}
+
+static int rdt_default_cgroups(void) {
+ int ret;
+
+ /* configure each core in separate group */
+ for (unsigned i = 0; i < g_rdt->pqos_cpu->num_cores; i++) {
+ char desc[DATA_MAX_NAME_LEN];
+ uint64_t core = i;
+
+ ssnprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore);
+
+ /* set core group info */
+ ret = cgroup_set(&g_rdt->cgroups[i], desc, &core, 1);
+ if (ret < 0)
+ return ret;
+ }
+
+ return g_rdt->pqos_cpu->num_cores;
+}
+
+static int rdt_config_cgroups(oconfig_item_t *item) {
+ int n = 0;
+ enum pqos_mon_event events = 0;
+
+ if (item == NULL) {
+ DEBUG(RDT_PLUGIN ": cgroups_config: Invalid argument.");
+ return (-EINVAL);
+ }
+
+ DEBUG(RDT_PLUGIN ": Core groups [%d]:", item->values_num);
+ for (int j = 0; j < item->values_num; j++) {
+ if (item->values[j].type != OCONFIG_TYPE_STRING) {
+ ERROR(RDT_PLUGIN ": given core group value is not a string [idx=%d]",
+ j);
+ return (-EINVAL);
+ }
+ DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string);
+ }
+
+ n = oconfig_to_cgroups(item, g_rdt->cgroups, RDT_MAX_CORES,
+ g_rdt->pqos_cpu->num_cores-1);
+ if (n < 0) {
+ rdt_free_cgroups();
+ ERROR(RDT_PLUGIN ": Error parsing core groups configuration.");
+ return (-EINVAL);
+ }
+
+ if (n == 0) {
+ /* create default core groups if "Cores" config option is empty */
+ n = rdt_default_cgroups();
+ if (n < 0) {
+ rdt_free_cgroups();
+ ERROR(RDT_PLUGIN
+ ": Error creating default core groups configuration.");
+ return n;
+ }
+ INFO(RDT_PLUGIN
+ ": No core groups configured. Default core groups created.");
+ }
+
+ /* Get all available events on this platform */
+ for (int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++)
+ events |= g_rdt->cap_mon->u.mon->events[i].type;
+
+ events &= ~(PQOS_PERF_EVENT_LLC_MISS);
+
+ DEBUG(RDT_PLUGIN ": Number of cores in the system: %u",
+ g_rdt->pqos_cpu->num_cores);
+ DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events);
+
+ g_rdt->num_groups = n;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < i; j++) {
+ int found = 0;
+ found = cgroup_cmp(&g_rdt->cgroups[j], &g_rdt->cgroups[i]);
+ if (found != 0) {
+ rdt_free_cgroups();
+ ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups.");
+ return (-EINVAL);
+ }
+ }
+
+ g_rdt->cgroups[i].events = events;
+ g_rdt->pgroups[i] = calloc(1, sizeof(*g_rdt->pgroups[i]));
+ if (g_rdt->pgroups[i] == NULL) {
+ rdt_free_cgroups();
+ ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data.");
+ return (-ENOMEM);
+ }
+ }
+
+ return (0);
+}
+
+static int rdt_preinit(void) {
+ int ret;
+
+ if (g_rdt != NULL) {
+ /* already initialized if config callback was called before init callback */
+ return (0);
+ }
+
+ g_rdt = calloc(1, sizeof(*g_rdt));
+ if (g_rdt == NULL) {
+ ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context.");
+ return (-ENOMEM);
+ }
+
+ /* In case previous instance of the application was not closed properly
+ * call fini and ignore return code. */
+ pqos_fini();
+
+ /* TODO:
+ * stdout should not be used here. Will be reworked when support of log
+ * callback is added to PQoS library.
+ */
+ ret = pqos_init(&(struct pqos_config){.fd_log = STDOUT_FILENO});
+ if (ret != PQOS_RETVAL_OK) {
+ ERROR(RDT_PLUGIN ": Error initializing PQoS library!");
+ goto rdt_preinit_error1;
+ }
+
+ ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu);
+ if (ret != PQOS_RETVAL_OK) {
+ ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities.");
+ goto rdt_preinit_error2;
+ }
+
+ ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON,
+ &g_rdt->cap_mon);
+ if (ret == PQOS_RETVAL_PARAM) {
+ ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities.");
+ goto rdt_preinit_error2;
+ }
+
+ if (g_rdt->cap_mon == NULL) {
+ ERROR(
+ RDT_PLUGIN
+ ": Monitoring capability not detected. Nothing to do for the plugin.");
+ goto rdt_preinit_error2;
+ }
+
+ return (0);
+
+rdt_preinit_error2:
+ pqos_fini();
+
+rdt_preinit_error1:
+
+ sfree(g_rdt);
+
+ return (-1);
+}
+
+static int rdt_config(oconfig_item_t *ci) {
+ int ret = 0;
+
+ ret = rdt_preinit();
+ if (ret != 0)
+ return ret;
+
+ for (int i = 0; i < ci->children_num; i++) {
+ oconfig_item_t *child = ci->children + i;
+
+ if (strcasecmp("Cores", child->key) == 0) {
+
+ ret = rdt_config_cgroups(child);
+ if (ret != 0)
+ return ret;
+
+#if COLLECT_DEBUG
+ rdt_dump_cgroups();
+#endif /* COLLECT_DEBUG */
+
+ } else {
+ ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".",
+ child->key);
+ }
+ }
+
+ return (0);
+}
+
+static void rdt_submit_derive(char *cgroup, char *type, char *type_instance,
+ derive_t value) {
+ value_list_t vl = VALUE_LIST_INIT;
+
+ vl.values = &(value_t) { .derive = value };
+ vl.values_len = 1;
+
+ sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
+ snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
+ sstrncpy(vl.type, type, sizeof(vl.type));
+ if (type_instance)
+ sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
+
+ plugin_dispatch_values(&vl);
+}
+
+static void rdt_submit_gauge(char *cgroup, char *type, char *type_instance,
+ gauge_t value) {
+ value_list_t vl = VALUE_LIST_INIT;
+
+ vl.values = &(value_t) { .gauge = value };
+ vl.values_len = 1;
+
+ sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin));
+ snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
+ sstrncpy(vl.type, type, sizeof(vl.type));
+ if (type_instance)
+ sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
+
+ plugin_dispatch_values(&vl);
+}
+
+static int rdt_read(__attribute__((unused)) user_data_t *ud) {
+ int ret;
+
+ if (g_rdt == NULL) {
+ ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized.");
+ return (-EINVAL);
+ }
+
+ ret = pqos_mon_poll(&g_rdt->pgroups[0], (unsigned)g_rdt->num_groups);
+ if (ret != PQOS_RETVAL_OK) {
+ ERROR(RDT_PLUGIN ": Failed to poll monitoring data.");
+ return (-1);
+ }
+
+#if COLLECT_DEBUG
+ rdt_dump_data();
+#endif /* COLLECT_DEBUG */
+
+ for (int i = 0; i < g_rdt->num_groups; i++) {
+ enum pqos_mon_event mbm_events =
+ (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
+ PQOS_MON_EVENT_RMEM_BW);
+
+ const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values;
+
+ /* Submit only monitored events data */
+
+ if (g_rdt->cgroups[i].events & PQOS_MON_EVENT_L3_OCCUP)
+ rdt_submit_gauge(g_rdt->cgroups[i].desc, "bytes", "llc", pv->llc);
+
+ if (g_rdt->cgroups[i].events & PQOS_PERF_EVENT_IPC)
+ rdt_submit_gauge(g_rdt->cgroups[i].desc, "ipc", NULL, pv->ipc);
+
+ if (g_rdt->cgroups[i].events & mbm_events) {
+ rdt_submit_derive(g_rdt->cgroups[i].desc, "memory_bandwidth",
+ "local", pv->mbm_local_delta);
+ rdt_submit_derive(g_rdt->cgroups[i].desc, "memory_bandwidth",
+ "remote", pv->mbm_remote_delta);
+ }
+ }
+
+ return (0);
+}
+
+static int rdt_init(void) {
+ int ret;
+
+ ret = rdt_preinit();
+ if (ret != 0)
+ return ret;
+
+ /* Start monitoring */
+ for (int i = 0; i < g_rdt->num_groups; i++) {
+ rdt_core_group_t *cg = &g_rdt->cgroups[i];
+
+ ret = pqos_mon_start(cg->num_cores, cg->cores, cg->events, (void *)cg->desc,
+ g_rdt->pgroups[i]);
+
+ if (ret != PQOS_RETVAL_OK)
+ ERROR(RDT_PLUGIN ": Error starting monitoring group %s (pqos status=%d)",
+ cg->desc, ret);
+ }
+
+ return (0);
+}
+
+static int rdt_shutdown(void) {
+ int ret;
+
+ DEBUG(RDT_PLUGIN ": rdt_shutdown.");
+
+ if (g_rdt == NULL)
+ return (0);
+
+ /* Stop monitoring */
+ for (int i = 0; i < g_rdt->num_groups; i++) {
+ pqos_mon_stop(g_rdt->pgroups[i]);
+ }
+
+ ret = pqos_fini();
+ if (ret != PQOS_RETVAL_OK)
+ ERROR(RDT_PLUGIN ": Error shutting down PQoS library.");
+
+ rdt_free_cgroups();
+ sfree(g_rdt);
+
+ return (0);
+}
+
+void module_register(void) {
+ plugin_register_init(RDT_PLUGIN, rdt_init);
+ plugin_register_complex_config(RDT_PLUGIN, rdt_config);
+ plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL);
+ plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown);
+}
diff --git a/src/rdtmon.c b/src/rdtmon.c
--- a/src/rdtmon.c
+++ /dev/null
@@ -1,678 +0,0 @@
-/**
- * collectd - src/rdtmon.c
- *
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of
- * this software and associated documentation files (the "Software"), to deal in
- * the Software without restriction, including without limitation the rights to
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished to do
- * so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- * Serhiy Pshyk <serhiyx.pshyk@intel.com>
- **/
-
-#include <pqos.h>
-
-#include "common.h"
-
-#define RDTMON_PLUGIN "rdtmon"
-
-#define RDTMON_MAX_SOCKETS 8
-#define RDTMON_MAX_SOCKET_CORES 64
-#define RDTMON_MAX_CORES (RDTMON_MAX_SOCKET_CORES * RDTMON_MAX_SOCKETS)
-
-struct rdtmon_core_group_s {
- char *desc;
- size_t num_cores;
- unsigned *cores;
- enum pqos_mon_event events;
-};
-typedef struct rdtmon_core_group_s rdtmon_core_group_t;
-
-struct rdtmon_ctx_s {
- rdtmon_core_group_t cgroups[RDTMON_MAX_CORES];
- struct pqos_mon_data *pgroups[RDTMON_MAX_CORES];
- size_t num_groups;
- const struct pqos_cpuinfo *pqos_cpu;
- const struct pqos_cap *pqos_cap;
- const struct pqos_capability *cap_mon;
-};
-typedef struct rdtmon_ctx_s rdtmon_ctx_t;
-
-static rdtmon_ctx_t *g_rdtmon = NULL;
-
-static int isdup(const uint64_t *nums, size_t size, uint64_t val) {
- for (size_t i = 0; i < size; i++)
- if (nums[i] == val)
- return 1;
- return 0;
-}
-
-static int strtouint64(const char *s, uint64_t *n) {
- char *endptr = NULL;
-
- assert(s != NULL);
- assert(n != NULL);
-
- *n = strtoull(s, &endptr, 0);
-
- if (!(*s != '\0' && *endptr == '\0')) {
- DEBUG(RDTMON_PLUGIN ": Error converting '%s' to unsigned number.", s);
- return (-EINVAL);
- }
-
- return (0);
-}
-
-/*
- * NAME
- * strlisttonums
- *
- * DESCRIPTION
- * Converts string of characters representing list of numbers into array of
- * numbers. Allowed formats are:
- * 0,1,2,3
- * 0-10,20-18
- * 1,3,5-8,10,0x10-12
- *
- * Numbers can be in decimal or hexadecimal format.
- *
- * PARAMETERS
- * `s' String representing list of unsigned numbers.
- * `nums' Array to put converted numeric values into.
- * `max' Maximum number of elements that nums can accommodate.
- *
- * RETURN VALUE
- * Number of elements placed into nums.
- */
-static size_t strlisttonums(char *s, uint64_t *nums, size_t max) {
- int ret;
- size_t index = 0;
- char *saveptr = NULL;
-
- if (s == NULL || nums == NULL || max == 0)
- return index;
-
- for (;;) {
- char *p = NULL;
- char *token = NULL;
-
- token = strtok_r(s, ",", &saveptr);
- if (token == NULL)
- break;
-
- s = NULL;
-
- while (isspace(*token))
- token++;
- if (*token == '\0')
- continue;
-
- p = strchr(token, '-');
- if (p != NULL) {
- uint64_t n, start, end;
- *p = '\0';
- ret = strtouint64(token, &start);
- if (ret < 0)
- return (0);
- ret = strtouint64(p + 1, &end);
- if (ret < 0)
- return (0);
- if (start > end) {
- return (0);
- }
- for (n = start; n <= end; n++) {
- if (!(isdup(nums, index, n))) {
- nums[index] = n;
- index++;
- }
- if (index >= max)
- return index;
- }
- } else {
- uint64_t val;
-
- ret = strtouint64(token, &val);
- if (ret < 0)
- return (0);
-
- if (!(isdup(nums, index, val))) {
- nums[index] = val;
- index++;
- }
- if (index >= max)
- return index;
- }
- }
-
- return index;
-}
-
-/*
- * NAME
- * cgroup_cmp
- *
- * DESCRIPTION
- * Function to compare cores in 2 core groups.
- *
- * PARAMETERS
- * `cg_a' Pointer to core group a.
- * `cg_b' Pointer to core group b.
- *
- * RETURN VALUE
- * 1 if both groups contain the same cores
- * 0 if none of their cores match
- * -1 if some but not all cores match
- */
-static int cgroup_cmp(const rdtmon_core_group_t *cg_a,
- const rdtmon_core_group_t *cg_b) {
- int found = 0;
-
- assert(cg_a != NULL);
- assert(cg_b != NULL);
-
- const int sz_a = cg_a->num_cores;
- const int sz_b = cg_b->num_cores;
- const unsigned *tab_a = cg_a->cores;
- const unsigned *tab_b = cg_b->cores;
-
- for (int i = 0; i < sz_a; i++) {
- for (int j = 0; j < sz_b; j++)
- if (tab_a[i] == tab_b[j])
- found++;
- }
- /* if no cores are the same */
- if (!found)
- return 0;
- /* if group contains same cores */
- if (sz_a == sz_b && sz_b == found)
- return 1;
- /* if not all cores are the same */
- return -1;
-}
-
-static int cgroup_set(rdtmon_core_group_t *cg, char *desc, uint64_t *cores,
- size_t num_cores) {
- assert(cg != NULL);
- assert(desc != NULL);
- assert(cores != NULL);
- assert(num_cores > 0);
-
- cg->cores = calloc(num_cores, sizeof(unsigned));
- if (cg->cores == NULL) {
- ERROR(RDTMON_PLUGIN ": Error allocating core group table");
- return (-ENOMEM);
- }
- cg->num_cores = num_cores;
- cg->desc = strdup(desc);
- if (cg->desc == NULL) {
- ERROR(RDTMON_PLUGIN ": Error allocating core group description");
- sfree(cg->cores);
- return (-ENOMEM);
- }
-
- for (size_t i = 0; i < num_cores; i++)
- cg->cores[i] = (unsigned)cores[i];
-
- return 0;
-}
-
-/*
- * NAME
- * oconfig_to_cgroups
- *
- * DESCRIPTION
- * Function to set the descriptions and cores for each core group.
- * Takes a config option containing list of strings that are used to set
- * core group values.
- *
- * PARAMETERS
- * `item' Config option containing core groups.
- * `groups' Table of core groups to set values in.
- * `max_groups' Maximum number of core groups allowed.
- * `max_core' Maximum allowed core value.
- *
- * RETURN VALUE
- * On success, the number of core groups set up. On error, appropriate
- * negative error value.
- */
-static int oconfig_to_cgroups(oconfig_item_t *item, rdtmon_core_group_t *groups,
- size_t max_groups, uint64_t max_core) {
- int index = 0;
-
- assert(groups != NULL);
- assert(max_groups > 0);
- assert(item != NULL);
-
- for (int j = 0; j < item->values_num; j++) {
- int ret;
- size_t n;
- uint64_t cores[RDTMON_MAX_CORES] = {0};
- char value[DATA_MAX_NAME_LEN];
-
- if ((item->values[j].value.string == NULL) || (strlen(item->values[j].value.string) == 0))
- continue;
-
- sstrncpy(value, item->values[j].value.string, sizeof(value));
-
- n = strlisttonums(value, cores, STATIC_ARRAY_SIZE(cores));
- if (n == 0) {
- ERROR(RDTMON_PLUGIN ": Error parsing core group (%s)",
- item->values[j].value.string);
- return (-EINVAL);
- }
-
- for (int i = 0; i < n; i++) {
- if (cores[i] > max_core) {
- ERROR(RDTMON_PLUGIN ": Core group (%s) contains invalid core id (%d)",
- item->values[j].value.string, (int)cores[i]);
- return (-EINVAL);
- }
- }
-
- /* set core group info */
- ret = cgroup_set(&groups[index], item->values[j].value.string, cores, n);
- if (ret < 0)
- return ret;
-
- index++;
-
- if (index >= max_groups) {
- WARNING(RDTMON_PLUGIN ": Too many core groups configured");
- return index;
- }
- }
-
- return index;
-}
-
-#if COLLECT_DEBUG
-static void rdtmon_dump_cgroups(void) {
- char cores[RDTMON_MAX_CORES * 4];
-
- if (g_rdtmon == NULL)
- return;
-
- DEBUG(RDTMON_PLUGIN ": Core Groups Dump");
- DEBUG(RDTMON_PLUGIN ": groups count: %zu", g_rdtmon->num_groups);
-
- for (int i = 0; i < g_rdtmon->num_groups; i++) {
-
- memset(cores, 0, sizeof(cores));
- for (int j = 0; j < g_rdtmon->cgroups[i].num_cores; j++) {
- snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d",
- g_rdtmon->cgroups[i].cores[j]);
- }
-
- DEBUG(RDTMON_PLUGIN ": group[%d]:", i);
- DEBUG(RDTMON_PLUGIN ": description: %s", g_rdtmon->cgroups[i].desc);
- DEBUG(RDTMON_PLUGIN ": cores: %s", cores);
- DEBUG(RDTMON_PLUGIN ": events: 0x%X", g_rdtmon->cgroups[i].events);
- }
-
- return;
-}
-
-static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; }
-
-static inline double bytes_to_mb(const double bytes) {
- return bytes / (1024.0 * 1024.0);
-}
-
-static void rdtmon_dump_data(void) {
- /*
- * CORE - monitored group of cores
- * RMID - Resource Monitoring ID associated with the monitored group
- * LLC - last level cache occupancy
- * MBL - local memory bandwidth
- * MBR - remote memory bandwidth
- */
- DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]");
- for (int i = 0; i < g_rdtmon->num_groups; i++) {
-
- const struct pqos_event_values *pv = &g_rdtmon->pgroups[i]->values;
-
- double llc = bytes_to_kb(pv->llc);
- double mbr = bytes_to_mb(pv->mbm_remote_delta);
- double mbl = bytes_to_mb(pv->mbm_local_delta);
-
- DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdtmon->cgroups[i].desc,
- g_rdtmon->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr);
- }
-}
-#endif /* COLLECT_DEBUG */
-
-static void rdtmon_free_cgroups(void) {
- for (int i = 0; i < RDTMON_MAX_CORES; i++) {
- sfree(g_rdtmon->cgroups[i].desc);
-
- sfree(g_rdtmon->cgroups[i].cores);
- g_rdtmon->cgroups[i].num_cores = 0;
-
- sfree(g_rdtmon->pgroups[i]);
- }
-}
-
-static int rdtmon_default_cgroups(void) {
- int ret;
-
- /* configure each core in separate group */
- for (unsigned i = 0; i < g_rdtmon->pqos_cpu->num_cores; i++) {
- char desc[DATA_MAX_NAME_LEN];
- uint64_t core = i;
-
- ssnprintf(desc, sizeof(desc), "%d", g_rdtmon->pqos_cpu->cores[i].lcore);
-
- /* set core group info */
- ret = cgroup_set(&g_rdtmon->cgroups[i], desc, &core, 1);
- if (ret < 0)
- return ret;
- }
-
- return g_rdtmon->pqos_cpu->num_cores;
-}
-
-static int rdtmon_config_cgroups(oconfig_item_t *item) {
- int n = 0;
- enum pqos_mon_event events = 0;
-
- if (item == NULL) {
- DEBUG(RDTMON_PLUGIN ": cgroups_config: Invalid argument.");
- return (-EINVAL);
- }
-
- DEBUG(RDTMON_PLUGIN ": Core groups [%d]:", item->values_num);
- for (int j = 0; j < item->values_num; j++) {
- if (item->values[j].type != OCONFIG_TYPE_STRING) {
- ERROR(RDTMON_PLUGIN ": given core group value is not a string [idx=%d]",
- j);
- return (-EINVAL);
- }
- DEBUG(RDTMON_PLUGIN ": [%d]: %s", j, item->values[j].value.string);
- }
-
- n = oconfig_to_cgroups(item, g_rdtmon->cgroups, RDTMON_MAX_CORES,
- g_rdtmon->pqos_cpu->num_cores-1);
- if (n < 0) {
- rdtmon_free_cgroups();
- ERROR(RDTMON_PLUGIN ": Error parsing core groups configuration.");
- return (-EINVAL);
- }
-
- if (n == 0) {
- /* create default core groups if "Cores" config option is empty */
- n = rdtmon_default_cgroups();
- if (n < 0) {
- rdtmon_free_cgroups();
- ERROR(RDTMON_PLUGIN
- ": Error creating default core groups configuration.");
- return n;
- }
- INFO(RDTMON_PLUGIN
- ": No core groups configured. Default core groups created.");
- }
-
- /* Get all available events on this platform */
- for (int i = 0; i < g_rdtmon->cap_mon->u.mon->num_events; i++)
- events |= g_rdtmon->cap_mon->u.mon->events[i].type;
-
- events &= ~(PQOS_PERF_EVENT_LLC_MISS);
-
- DEBUG(RDTMON_PLUGIN ": Number of cores in the system: %u",
- g_rdtmon->pqos_cpu->num_cores);
- DEBUG(RDTMON_PLUGIN ": Available events to monitor: %#x", events);
-
- g_rdtmon->num_groups = n;
- for (int i = 0; i < n; i++) {
- for (int j = 0; j < i; j++) {
- int found = 0;
- found = cgroup_cmp(&g_rdtmon->cgroups[j], &g_rdtmon->cgroups[i]);
- if (found != 0) {
- rdtmon_free_cgroups();
- ERROR(RDTMON_PLUGIN ": Cannot monitor same cores in different groups.");
- return (-EINVAL);
- }
- }
-
- g_rdtmon->cgroups[i].events = events;
- g_rdtmon->pgroups[i] = calloc(1, sizeof(*g_rdtmon->pgroups[i]));
- if (g_rdtmon->pgroups[i] == NULL) {
- rdtmon_free_cgroups();
- ERROR(RDTMON_PLUGIN ": Failed to allocate memory for monitoring data.");
- return (-ENOMEM);
- }
- }
-
- return (0);
-}
-
-static int rdtmon_preinit(void) {
- int ret;
-
- if (g_rdtmon != NULL) {
- /* already initialized if config callback was called before init callback */
- return (0);
- }
-
- g_rdtmon = calloc(1, sizeof(*g_rdtmon));
- if (g_rdtmon == NULL) {
- ERROR(RDTMON_PLUGIN ": Failed to allocate memory for rdtmon context.");
- return (-ENOMEM);
- }
-
- /* In case previous instance of the application was not closed properly
- * call fini and ignore return code. */
- pqos_fini();
-
- /* TODO:
- * stdout should not be used here. Will be reworked when support of log
- * callback is added to PQoS library.
- */
- ret = pqos_init(&(struct pqos_config){.fd_log = STDOUT_FILENO});
- if (ret != PQOS_RETVAL_OK) {
- ERROR(RDTMON_PLUGIN ": Error initializing PQoS library!");
- goto rdtmon_preinit_error1;
- }
-
- ret = pqos_cap_get(&g_rdtmon->pqos_cap, &g_rdtmon->pqos_cpu);
- if (ret != PQOS_RETVAL_OK) {
- ERROR(RDTMON_PLUGIN ": Error retrieving PQoS capabilities.");
- goto rdtmon_preinit_error2;
- }
-
- ret = pqos_cap_get_type(g_rdtmon->pqos_cap, PQOS_CAP_TYPE_MON,
- &g_rdtmon->cap_mon);
- if (ret == PQOS_RETVAL_PARAM) {
- ERROR(RDTMON_PLUGIN ": Error retrieving monitoring capabilities.");
- goto rdtmon_preinit_error2;
- }
-
- if (g_rdtmon->cap_mon == NULL) {
- ERROR(
- RDTMON_PLUGIN
- ": Monitoring capability not detected. Nothing to do for the plugin.");
- goto rdtmon_preinit_error2;
- }
-
- return (0);
-
-rdtmon_preinit_error2:
- pqos_fini();
-
-rdtmon_preinit_error1:
-
- sfree(g_rdtmon);
-
- return (-1);
-}
-
-static int rdtmon_config(oconfig_item_t *ci) {
- int ret = 0;
-
- ret = rdtmon_preinit();
- if (ret != 0)
- return ret;
-
- for (int i = 0; i < ci->children_num; i++) {
- oconfig_item_t *child = ci->children + i;
-
- if (strcasecmp("Cores", child->key) == 0) {
-
- ret = rdtmon_config_cgroups(child);
- if (ret != 0)
- return ret;
-
-#if COLLECT_DEBUG
- rdtmon_dump_cgroups();
-#endif /* COLLECT_DEBUG */
-
- } else {
- ERROR(RDTMON_PLUGIN ": Unknown configuration parameter \"%s\".",
- child->key);
- }
- }
-
- return (0);
-}
-
-static void rdtmon_submit_derive(char *cgroup, char *type, char *type_instance,
- derive_t value) {
- value_list_t vl = VALUE_LIST_INIT;
-
- vl.values = &(value_t) { .derive = value };
- vl.values_len = 1;
-
- sstrncpy(vl.plugin, RDTMON_PLUGIN, sizeof(vl.plugin));
- snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
- sstrncpy(vl.type, type, sizeof(vl.type));
- if (type_instance)
- sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
-
- plugin_dispatch_values(&vl);
-}
-
-static void rdtmon_submit_gauge(char *cgroup, char *type, char *type_instance,
- gauge_t value) {
- value_list_t vl = VALUE_LIST_INIT;
-
- vl.values = &(value_t) { .gauge = value };
- vl.values_len = 1;
-
- sstrncpy(vl.plugin, RDTMON_PLUGIN, sizeof(vl.plugin));
- snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup);
- sstrncpy(vl.type, type, sizeof(vl.type));
- if (type_instance)
- sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
-
- plugin_dispatch_values(&vl);
-}
-
-static int rdtmon_read(__attribute__((unused)) user_data_t *ud) {
- int ret;
-
- if (g_rdtmon == NULL) {
- ERROR(RDTMON_PLUGIN ": rdtmon_read: plugin not initialized.");
- return (-EINVAL);
- }
-
- ret = pqos_mon_poll(&g_rdtmon->pgroups[0], (unsigned)g_rdtmon->num_groups);
- if (ret != PQOS_RETVAL_OK) {
- ERROR(RDTMON_PLUGIN ": Failed to poll monitoring data.");
- return (-1);
- }
-
-#if COLLECT_DEBUG
- rdtmon_dump_data();
-#endif /* COLLECT_DEBUG */
-
- for (int i = 0; i < g_rdtmon->num_groups; i++) {
- enum pqos_mon_event mbm_events =
- (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW |
- PQOS_MON_EVENT_RMEM_BW);
-
- const struct pqos_event_values *pv = &g_rdtmon->pgroups[i]->values;
-
- /* Submit only monitored events data */
-
- if (g_rdtmon->cgroups[i].events & PQOS_MON_EVENT_L3_OCCUP)
- rdtmon_submit_gauge(g_rdtmon->cgroups[i].desc, "bytes", "llc", pv->llc);
-
- if (g_rdtmon->cgroups[i].events & PQOS_PERF_EVENT_IPC)
- rdtmon_submit_gauge(g_rdtmon->cgroups[i].desc, "ipc", NULL, pv->ipc);
-
- if (g_rdtmon->cgroups[i].events & mbm_events) {
- rdtmon_submit_derive(g_rdtmon->cgroups[i].desc, "memory_bandwidth",
- "local", pv->mbm_local_delta);
- rdtmon_submit_derive(g_rdtmon->cgroups[i].desc, "memory_bandwidth",
- "remote", pv->mbm_remote_delta);
- }
- }
-
- return (0);
-}
-
-static int rdtmon_init(void) {
- int ret;
-
- ret = rdtmon_preinit();
- if (ret != 0)
- return ret;
-
- /* Start monitoring */
- for (int i = 0; i < g_rdtmon->num_groups; i++) {
- rdtmon_core_group_t *cg = &g_rdtmon->cgroups[i];
-
- ret = pqos_mon_start(cg->num_cores, cg->cores, cg->events, (void *)cg->desc,
- g_rdtmon->pgroups[i]);
-
- if (ret != PQOS_RETVAL_OK)
- ERROR(RDTMON_PLUGIN ": Error starting monitoring group %s (pqos status=%d)",
- cg->desc, ret);
- }
-
- return (0);
-}
-
-static int rdtmon_shutdown(void) {
- int ret;
-
- DEBUG(RDTMON_PLUGIN ": rdtmon_shutdown.");
-
- if (g_rdtmon == NULL)
- return (0);
-
- /* Stop monitoring */
- for (int i = 0; i < g_rdtmon->num_groups; i++) {
- pqos_mon_stop(g_rdtmon->pgroups[i]);
- }
-
- ret = pqos_fini();
- if (ret != PQOS_RETVAL_OK)
- ERROR(RDTMON_PLUGIN ": Error shutting down PQoS library.");
-
- rdtmon_free_cgroups();
- sfree(g_rdtmon);
-
- return (0);
-}
-
-void module_register(void) {
- plugin_register_init(RDTMON_PLUGIN, rdtmon_init);
- plugin_register_complex_config(RDTMON_PLUGIN, rdtmon_config);
- plugin_register_complex_read(NULL, RDTMON_PLUGIN, rdtmon_read, 0, NULL);
- plugin_register_shutdown(RDTMON_PLUGIN, rdtmon_shutdown);
-}