From: Pshyk, SerhiyX Date: Thu, 6 Oct 2016 11:19:56 +0000 (+0100) Subject: rdtmon: Rename 'rdtmon' plugin to 'intel_rdt'. X-Git-Tag: collectd-5.7.0~56^2~2 X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=141fd306b2f83d9bd7a4434348ef864c109cbaf1;p=collectd.git rdtmon: Rename 'rdtmon' plugin to 'intel_rdt'. Change-Id: Id23eb96fd37e6d4fc5fdf7e7ed58d9e74a33cca0 Signed-off-by: Serhiy Pshyk --- diff --git a/README b/README index 12c6574f..9b8e5d4c 100644 --- a/README +++ b/README @@ -142,6 +142,15 @@ Features hugepages can be found here: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt. + - intel_rdt + The intel_rdt plugin collects information provided by monitoring features + of Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring + Technology (CMT), Memory Bandwidth Monitoring (MBM). These features + provide information about utilization of shared resources like last level + cache occupancy, local memory bandwidth usage, remote memory bandwidth + usage, instructions per clock. + + - interface Interface traffic: Number of octets, packets and errors for each interface. @@ -313,15 +322,6 @@ Features collectd without the need to start a heavy interpreter every interval. See collectd-python(5) for details. - - rdtmon - The rdtmon plugin collects information provided by monitoring features of - Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring - Technology (CMT), Memory Bandwidth Monitoring (MBM). These features - provide information about utilization of shared resources like last level - cache occupancy, local memory bandwidth usage, remote memory bandwidth - usage, instructions per clock. - - - redis The redis plugin gathers information from a Redis server, including: uptime, used memory, total connections etc. @@ -825,7 +825,7 @@ Prerequisites * libpqos (optional) The PQoS library for Intel(R) Resource Director Technology used by the - `rdtmon' plugin. + `intel_rdt' plugin. * libprotobuf, protoc 3.0+ (optional) diff --git a/configure.ac b/configure.ac index a74864b8..07f93922 100644 --- a/configure.ac +++ b/configure.ac @@ -5835,6 +5835,7 @@ plugin_fscache="no" plugin_gps="no" plugin_grpc="no" plugin_hugepages="no" +plugin_intel_rdt="no" plugin_interface="no" plugin_ipmi="no" plugin_ipvs="no" @@ -5850,7 +5851,6 @@ plugin_pinba="no" plugin_processes="no" plugin_protocols="no" plugin_python="no" -plugin_rdtmon="no" plugin_serial="no" plugin_smart="no" plugin_swap="no" @@ -6293,6 +6293,7 @@ AC_PLUGIN([gps], [$plugin_gps], [GPS plugin]) AC_PLUGIN([grpc], [$plugin_grpc], [gRPC plugin]) AC_PLUGIN([hddtemp], [yes], [Query hddtempd]) AC_PLUGIN([hugepages], [$plugin_hugepages], [Hugepages statistics]) +AC_PLUGIN([intel_rdt], [$with_libpqos], [Intel RDT monitor plugin]) AC_PLUGIN([interface], [$plugin_interface], [Interface traffic statistics]) AC_PLUGIN([ipc], [$plugin_ipc], [IPC statistics]) AC_PLUGIN([ipmi], [$plugin_ipmi], [IPMI sensor statistics]) @@ -6348,7 +6349,6 @@ AC_PLUGIN([powerdns], [yes], [PowerDNS statistics AC_PLUGIN([processes], [$plugin_processes], [Process statistics]) AC_PLUGIN([protocols], [$plugin_protocols], [Protocol (IP, TCP, ...) statistics]) AC_PLUGIN([python], [$plugin_python], [Embed a Python interpreter]) -AC_PLUGIN([rdtmon], [$with_libpqos], [RDT monitor plugin]) AC_PLUGIN([redis], [$with_libhiredis], [Redis plugin]) AC_PLUGIN([routeros], [$with_librouteros], [RouterOS plugin]) AC_PLUGIN([rrdcached], [$librrd_rrdc_update], [RRDTool output plugin]) @@ -6728,6 +6728,7 @@ AC_MSG_RESULT([ gps . . . . . . . . . $enable_gps]) AC_MSG_RESULT([ grpc . . . . . . . . $enable_grpc]) AC_MSG_RESULT([ hddtemp . . . . . . . $enable_hddtemp]) AC_MSG_RESULT([ hugepages . . . . . . $enable_hugepages]) +AC_MSG_RESULT([ intel_rdt. . . . . . $enable_intel_rdt]) AC_MSG_RESULT([ interface . . . . . . $enable_interface]) AC_MSG_RESULT([ ipc . . . . . . . . . $enable_ipc]) AC_MSG_RESULT([ ipmi . . . . . . . . $enable_ipmi]) @@ -6783,7 +6784,6 @@ AC_MSG_RESULT([ processes . . . . . . $enable_processes]) AC_MSG_RESULT([ protocols . . . . . . $enable_protocols]) AC_MSG_RESULT([ python . . . . . . . $enable_python]) AC_MSG_RESULT([ redis . . . . . . . . $enable_redis]) -AC_MSG_RESULT([ rdtmon . . . . . . . $enable_rdtmon]) AC_MSG_RESULT([ routeros . . . . . . $enable_routeros]) AC_MSG_RESULT([ rrdcached . . . . . . $enable_rrdcached]) AC_MSG_RESULT([ rrdtool . . . . . . . $enable_rrdtool]) diff --git a/src/Makefile.am b/src/Makefile.am index 5001fdef..f48fdf37 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -962,12 +962,12 @@ protocols_la_SOURCES = protocols.c protocols_la_LDFLAGS = $(PLUGIN_LDFLAGS) endif -if BUILD_PLUGIN_RDTMON -pkglib_LTLIBRARIES += rdtmon.la -rdtmon_la_SOURCES = rdtmon.c -rdtmon_la_LDFLAGS = $(PLUGIN_LDFLAGS) $(BUILD_WITH_LIBPQOS_LDFLAGS) -rdtmon_la_CFLAGS = $(AM_CFLAGS) $(BUILD_WITH_LIBPQOS_CPPFLAGS) -rdtmon_la_LIBADD = $(BUILD_WITH_LIBPQOS_LIBS) +if BUILD_PLUGIN_INTEL_RDT +pkglib_LTLIBRARIES += intel_rdt.la +intel_rdt_la_SOURCES = intel_rdt.c +intel_rdt_la_LDFLAGS = $(PLUGIN_LDFLAGS) $(BUILD_WITH_LIBPQOS_LDFLAGS) +intel_rdt_la_CFLAGS = $(AM_CFLAGS) $(BUILD_WITH_LIBPQOS_CPPFLAGS) +intel_rdt_la_LIBADD = $(BUILD_WITH_LIBPQOS_LIBS) endif if BUILD_PLUGIN_REDIS diff --git a/src/collectd.conf.in b/src/collectd.conf.in index 52028338..f81d7718 100644 --- a/src/collectd.conf.in +++ b/src/collectd.conf.in @@ -128,6 +128,7 @@ #@BUILD_PLUGIN_GRPC_TRUE@LoadPlugin grpc #@BUILD_PLUGIN_HDDTEMP_TRUE@LoadPlugin hddtemp #@BUILD_PLUGIN_HUGEPAGES_TRUE@LoadPlugin hugepages +#@BUILD_PLUGIN_INTEL_RDT_TRUE@LoadPlugin intel_rdt @BUILD_PLUGIN_INTERFACE_TRUE@@BUILD_PLUGIN_INTERFACE_TRUE@LoadPlugin interface #@BUILD_PLUGIN_IPC_TRUE@LoadPlugin ipc #@BUILD_PLUGIN_IPMI_TRUE@LoadPlugin ipmi @@ -175,7 +176,6 @@ #@BUILD_PLUGIN_PROTOCOLS_TRUE@LoadPlugin protocols #@BUILD_PLUGIN_PYTHON_TRUE@LoadPlugin python #@BUILD_PLUGIN_REDIS_TRUE@LoadPlugin redis -#@BUILD_PLUGIN_RDTMON_TRUE@LoadPlugin rdtmon #@BUILD_PLUGIN_ROUTEROS_TRUE@LoadPlugin routeros #@BUILD_PLUGIN_RRDCACHED_TRUE@LoadPlugin rrdcached @LOAD_PLUGIN_RRDTOOL@LoadPlugin rrdtool @@ -616,6 +616,10 @@ # ValuesPercentage false # +# +# Cores "0-2" +# + # # Interface "eth0" # IgnoreSelected false @@ -1098,10 +1102,6 @@ # # -# -# Cores "0-2" -# - # # # Host "redis.example.com" diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index 52f73242..36a6d00e 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -2928,6 +2928,60 @@ Defaults to B. =back +=head2 Plugin C + +The I plugin collects information provided by monitoring features of +Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring +Technology (CMT), Memory Bandwidth Monitoring (MBM). These features provide +information about utilization of shared resources. CMT monitors last level cache +occupancy (LLC). MBM supports two types of events reporting local and remote +memory bandwidth. Local memory bandwidth (MBL) reports the bandwidth of +accessing memory associated with the local socket. Remote memory bandwidth (MBR) +reports the bandwidth of accessing the remote socket. Also this technology +allows to monitor instructions per clock (IPC). +Monitor events are hardware dependant. Monitoring capabilities are detected on +plugin initialization and only supported events are monitored. + +B + + + Cores "0-2" "3,4,6" "8-10,15" + + +B + +=over 4 + +=item B I + +The interval within which to retrieve statistics on monitored events in seconds. +For milliseconds divide the time by 1000 for example if the desired interval +is 50ms, set interval to 0.05. Due to limited capacity of counters it is not +recommended to set interval higher than 1 sec. + +=item B I + +All events are reported on a per core basis. Monitoring of the events can be +configured for group of cores (aggregated statistics). This field defines groups +of cores on which to monitor supported events. The field is represented as list +of strings with core group values. Each string represents a list of cores in a +group. Allowed formats are: + 0,1,2,3 + 0-10,20-18 + 1,3,5-8,10,0x10-12 + +If an empty string is provided as value for this field default cores +configuration is applied - a separate group is created for each core. + +=back + +B By default global interval is used to retrieve statistics on monitored +events. To configure a plugin specific interval use B option of the +intel_rdt block. For milliseconds divide the time by 1000 for +example if the desired interval is 50ms, set interval to 0.05. +Due to limited capacity of counters it is not recommended to set interval higher +than 1 sec. + =head2 Plugin C =over 4 @@ -6295,60 +6349,6 @@ Defaults to B. =back -=head2 Plugin C - -The I plugin collects information provided by monitoring features of -Intel Resource Director Technology (Intel(R) RDT) like Cache Monitoring -Technology (CMT), Memory Bandwidth Monitoring (MBM). These features provide -information about utilization of shared resources. CMT monitors last level cache -occupancy (LLC). MBM supports two types of events reporting local and remote -memory bandwidth. Local memory bandwidth (MBL) reports the bandwidth of -accessing memory associated with the local socket. Remote memory bandwidth (MBR) -reports the bandwidth of accessing the remote socket. Also this technology -allows to monitor instructions per clock (IPC). -Monitor events are hardware dependant. Monitoring capabilities are detected on -plugin initialization and only supported events are monitored. - -B - - - Cores "0-2" "3,4,6" "8-10,15" - - -B - -=over 4 - -=item B I - -The interval within which to retrieve statistics on monitored events in seconds. -For milliseconds divide the time by 1000 for example if the desired interval -is 50ms, set interval to 0.05. Due to limited capacity of counters it is not -recommended to set interval higher than 1 sec. - -=item B I - -All events are reported on a per core basis. Monitoring of the events can be -configured for group of cores (aggregated statistics). This field defines groups -of cores on which to monitor supported events. The field is represented as list -of strings with core group values. Each string represents a list of cores in a -group. Allowed formats are: - 0,1,2,3 - 0-10,20-18 - 1,3,5-8,10,0x10-12 - -If an empty string is provided as value for this field default cores -configuration is applied - a separate group is created for each core. - -=back - -B By default global interval is used to retrieve statistics on monitored -events. To configure a plugin specific interval use B option of the -rdtmon block. For milliseconds divide the time by 1000 for example -if the desired interval is 50ms, set interval to 0.05. -Due to limited capacity of counters it is not recommended to set interval higher -than 1 sec. - =head2 Plugin C The I connects to one or more Redis servers and gathers diff --git a/src/intel_rdt.c b/src/intel_rdt.c new file mode 100644 index 00000000..0863a727 --- /dev/null +++ b/src/intel_rdt.c @@ -0,0 +1,678 @@ +/** + * collectd - src/intel_rdt.c + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do + * so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Serhiy Pshyk + **/ + +#include + +#include "common.h" + +#define RDT_PLUGIN "intel_rdt" + +#define RDT_MAX_SOCKETS 8 +#define RDT_MAX_SOCKET_CORES 64 +#define RDT_MAX_CORES (RDT_MAX_SOCKET_CORES * RDT_MAX_SOCKETS) + +struct rdt_core_group_s { + char *desc; + size_t num_cores; + unsigned *cores; + enum pqos_mon_event events; +}; +typedef struct rdt_core_group_s rdt_core_group_t; + +struct rdt_ctx_s { + rdt_core_group_t cgroups[RDT_MAX_CORES]; + struct pqos_mon_data *pgroups[RDT_MAX_CORES]; + size_t num_groups; + const struct pqos_cpuinfo *pqos_cpu; + const struct pqos_cap *pqos_cap; + const struct pqos_capability *cap_mon; +}; +typedef struct rdt_ctx_s rdt_ctx_t; + +static rdt_ctx_t *g_rdt = NULL; + +static int isdup(const uint64_t *nums, size_t size, uint64_t val) { + for (size_t i = 0; i < size; i++) + if (nums[i] == val) + return 1; + return 0; +} + +static int strtouint64(const char *s, uint64_t *n) { + char *endptr = NULL; + + assert(s != NULL); + assert(n != NULL); + + *n = strtoull(s, &endptr, 0); + + if (!(*s != '\0' && *endptr == '\0')) { + DEBUG(RDT_PLUGIN ": Error converting '%s' to unsigned number.", s); + return (-EINVAL); + } + + return (0); +} + +/* + * NAME + * strlisttonums + * + * DESCRIPTION + * Converts string of characters representing list of numbers into array of + * numbers. Allowed formats are: + * 0,1,2,3 + * 0-10,20-18 + * 1,3,5-8,10,0x10-12 + * + * Numbers can be in decimal or hexadecimal format. + * + * PARAMETERS + * `s' String representing list of unsigned numbers. + * `nums' Array to put converted numeric values into. + * `max' Maximum number of elements that nums can accommodate. + * + * RETURN VALUE + * Number of elements placed into nums. + */ +static size_t strlisttonums(char *s, uint64_t *nums, size_t max) { + int ret; + size_t index = 0; + char *saveptr = NULL; + + if (s == NULL || nums == NULL || max == 0) + return index; + + for (;;) { + char *p = NULL; + char *token = NULL; + + token = strtok_r(s, ",", &saveptr); + if (token == NULL) + break; + + s = NULL; + + while (isspace(*token)) + token++; + if (*token == '\0') + continue; + + p = strchr(token, '-'); + if (p != NULL) { + uint64_t n, start, end; + *p = '\0'; + ret = strtouint64(token, &start); + if (ret < 0) + return (0); + ret = strtouint64(p + 1, &end); + if (ret < 0) + return (0); + if (start > end) { + return (0); + } + for (n = start; n <= end; n++) { + if (!(isdup(nums, index, n))) { + nums[index] = n; + index++; + } + if (index >= max) + return index; + } + } else { + uint64_t val; + + ret = strtouint64(token, &val); + if (ret < 0) + return (0); + + if (!(isdup(nums, index, val))) { + nums[index] = val; + index++; + } + if (index >= max) + return index; + } + } + + return index; +} + +/* + * NAME + * cgroup_cmp + * + * DESCRIPTION + * Function to compare cores in 2 core groups. + * + * PARAMETERS + * `cg_a' Pointer to core group a. + * `cg_b' Pointer to core group b. + * + * RETURN VALUE + * 1 if both groups contain the same cores + * 0 if none of their cores match + * -1 if some but not all cores match + */ +static int cgroup_cmp(const rdt_core_group_t *cg_a, + const rdt_core_group_t *cg_b) { + int found = 0; + + assert(cg_a != NULL); + assert(cg_b != NULL); + + const int sz_a = cg_a->num_cores; + const int sz_b = cg_b->num_cores; + const unsigned *tab_a = cg_a->cores; + const unsigned *tab_b = cg_b->cores; + + for (int i = 0; i < sz_a; i++) { + for (int j = 0; j < sz_b; j++) + if (tab_a[i] == tab_b[j]) + found++; + } + /* if no cores are the same */ + if (!found) + return 0; + /* if group contains same cores */ + if (sz_a == sz_b && sz_b == found) + return 1; + /* if not all cores are the same */ + return -1; +} + +static int cgroup_set(rdt_core_group_t *cg, char *desc, uint64_t *cores, + size_t num_cores) { + assert(cg != NULL); + assert(desc != NULL); + assert(cores != NULL); + assert(num_cores > 0); + + cg->cores = calloc(num_cores, sizeof(unsigned)); + if (cg->cores == NULL) { + ERROR(RDT_PLUGIN ": Error allocating core group table"); + return (-ENOMEM); + } + cg->num_cores = num_cores; + cg->desc = strdup(desc); + if (cg->desc == NULL) { + ERROR(RDT_PLUGIN ": Error allocating core group description"); + sfree(cg->cores); + return (-ENOMEM); + } + + for (size_t i = 0; i < num_cores; i++) + cg->cores[i] = (unsigned)cores[i]; + + return 0; +} + +/* + * NAME + * oconfig_to_cgroups + * + * DESCRIPTION + * Function to set the descriptions and cores for each core group. + * Takes a config option containing list of strings that are used to set + * core group values. + * + * PARAMETERS + * `item' Config option containing core groups. + * `groups' Table of core groups to set values in. + * `max_groups' Maximum number of core groups allowed. + * `max_core' Maximum allowed core value. + * + * RETURN VALUE + * On success, the number of core groups set up. On error, appropriate + * negative error value. + */ +static int oconfig_to_cgroups(oconfig_item_t *item, rdt_core_group_t *groups, + size_t max_groups, uint64_t max_core) { + int index = 0; + + assert(groups != NULL); + assert(max_groups > 0); + assert(item != NULL); + + for (int j = 0; j < item->values_num; j++) { + int ret; + size_t n; + uint64_t cores[RDT_MAX_CORES] = {0}; + char value[DATA_MAX_NAME_LEN]; + + if ((item->values[j].value.string == NULL) || (strlen(item->values[j].value.string) == 0)) + continue; + + sstrncpy(value, item->values[j].value.string, sizeof(value)); + + n = strlisttonums(value, cores, STATIC_ARRAY_SIZE(cores)); + if (n == 0) { + ERROR(RDT_PLUGIN ": Error parsing core group (%s)", + item->values[j].value.string); + return (-EINVAL); + } + + for (int i = 0; i < n; i++) { + if (cores[i] > max_core) { + ERROR(RDT_PLUGIN ": Core group (%s) contains invalid core id (%d)", + item->values[j].value.string, (int)cores[i]); + return (-EINVAL); + } + } + + /* set core group info */ + ret = cgroup_set(&groups[index], item->values[j].value.string, cores, n); + if (ret < 0) + return ret; + + index++; + + if (index >= max_groups) { + WARNING(RDT_PLUGIN ": Too many core groups configured"); + return index; + } + } + + return index; +} + +#if COLLECT_DEBUG +static void rdt_dump_cgroups(void) { + char cores[RDT_MAX_CORES * 4]; + + if (g_rdt == NULL) + return; + + DEBUG(RDT_PLUGIN ": Core Groups Dump"); + DEBUG(RDT_PLUGIN ": groups count: %zu", g_rdt->num_groups); + + for (int i = 0; i < g_rdt->num_groups; i++) { + + memset(cores, 0, sizeof(cores)); + for (int j = 0; j < g_rdt->cgroups[i].num_cores; j++) { + snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d", + g_rdt->cgroups[i].cores[j]); + } + + DEBUG(RDT_PLUGIN ": group[%d]:", i); + DEBUG(RDT_PLUGIN ": description: %s", g_rdt->cgroups[i].desc); + DEBUG(RDT_PLUGIN ": cores: %s", cores); + DEBUG(RDT_PLUGIN ": events: 0x%X", g_rdt->cgroups[i].events); + } + + return; +} + +static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; } + +static inline double bytes_to_mb(const double bytes) { + return bytes / (1024.0 * 1024.0); +} + +static void rdt_dump_data(void) { + /* + * CORE - monitored group of cores + * RMID - Resource Monitoring ID associated with the monitored group + * LLC - last level cache occupancy + * MBL - local memory bandwidth + * MBR - remote memory bandwidth + */ + DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]"); + for (int i = 0; i < g_rdt->num_groups; i++) { + + const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values; + + double llc = bytes_to_kb(pv->llc); + double mbr = bytes_to_mb(pv->mbm_remote_delta); + double mbl = bytes_to_mb(pv->mbm_local_delta); + + DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdt->cgroups[i].desc, + g_rdt->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr); + } +} +#endif /* COLLECT_DEBUG */ + +static void rdt_free_cgroups(void) { + for (int i = 0; i < RDT_MAX_CORES; i++) { + sfree(g_rdt->cgroups[i].desc); + + sfree(g_rdt->cgroups[i].cores); + g_rdt->cgroups[i].num_cores = 0; + + sfree(g_rdt->pgroups[i]); + } +} + +static int rdt_default_cgroups(void) { + int ret; + + /* configure each core in separate group */ + for (unsigned i = 0; i < g_rdt->pqos_cpu->num_cores; i++) { + char desc[DATA_MAX_NAME_LEN]; + uint64_t core = i; + + ssnprintf(desc, sizeof(desc), "%d", g_rdt->pqos_cpu->cores[i].lcore); + + /* set core group info */ + ret = cgroup_set(&g_rdt->cgroups[i], desc, &core, 1); + if (ret < 0) + return ret; + } + + return g_rdt->pqos_cpu->num_cores; +} + +static int rdt_config_cgroups(oconfig_item_t *item) { + int n = 0; + enum pqos_mon_event events = 0; + + if (item == NULL) { + DEBUG(RDT_PLUGIN ": cgroups_config: Invalid argument."); + return (-EINVAL); + } + + DEBUG(RDT_PLUGIN ": Core groups [%d]:", item->values_num); + for (int j = 0; j < item->values_num; j++) { + if (item->values[j].type != OCONFIG_TYPE_STRING) { + ERROR(RDT_PLUGIN ": given core group value is not a string [idx=%d]", + j); + return (-EINVAL); + } + DEBUG(RDT_PLUGIN ": [%d]: %s", j, item->values[j].value.string); + } + + n = oconfig_to_cgroups(item, g_rdt->cgroups, RDT_MAX_CORES, + g_rdt->pqos_cpu->num_cores-1); + if (n < 0) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Error parsing core groups configuration."); + return (-EINVAL); + } + + if (n == 0) { + /* create default core groups if "Cores" config option is empty */ + n = rdt_default_cgroups(); + if (n < 0) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN + ": Error creating default core groups configuration."); + return n; + } + INFO(RDT_PLUGIN + ": No core groups configured. Default core groups created."); + } + + /* Get all available events on this platform */ + for (int i = 0; i < g_rdt->cap_mon->u.mon->num_events; i++) + events |= g_rdt->cap_mon->u.mon->events[i].type; + + events &= ~(PQOS_PERF_EVENT_LLC_MISS); + + DEBUG(RDT_PLUGIN ": Number of cores in the system: %u", + g_rdt->pqos_cpu->num_cores); + DEBUG(RDT_PLUGIN ": Available events to monitor: %#x", events); + + g_rdt->num_groups = n; + for (int i = 0; i < n; i++) { + for (int j = 0; j < i; j++) { + int found = 0; + found = cgroup_cmp(&g_rdt->cgroups[j], &g_rdt->cgroups[i]); + if (found != 0) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Cannot monitor same cores in different groups."); + return (-EINVAL); + } + } + + g_rdt->cgroups[i].events = events; + g_rdt->pgroups[i] = calloc(1, sizeof(*g_rdt->pgroups[i])); + if (g_rdt->pgroups[i] == NULL) { + rdt_free_cgroups(); + ERROR(RDT_PLUGIN ": Failed to allocate memory for monitoring data."); + return (-ENOMEM); + } + } + + return (0); +} + +static int rdt_preinit(void) { + int ret; + + if (g_rdt != NULL) { + /* already initialized if config callback was called before init callback */ + return (0); + } + + g_rdt = calloc(1, sizeof(*g_rdt)); + if (g_rdt == NULL) { + ERROR(RDT_PLUGIN ": Failed to allocate memory for rdt context."); + return (-ENOMEM); + } + + /* In case previous instance of the application was not closed properly + * call fini and ignore return code. */ + pqos_fini(); + + /* TODO: + * stdout should not be used here. Will be reworked when support of log + * callback is added to PQoS library. + */ + ret = pqos_init(&(struct pqos_config){.fd_log = STDOUT_FILENO}); + if (ret != PQOS_RETVAL_OK) { + ERROR(RDT_PLUGIN ": Error initializing PQoS library!"); + goto rdt_preinit_error1; + } + + ret = pqos_cap_get(&g_rdt->pqos_cap, &g_rdt->pqos_cpu); + if (ret != PQOS_RETVAL_OK) { + ERROR(RDT_PLUGIN ": Error retrieving PQoS capabilities."); + goto rdt_preinit_error2; + } + + ret = pqos_cap_get_type(g_rdt->pqos_cap, PQOS_CAP_TYPE_MON, + &g_rdt->cap_mon); + if (ret == PQOS_RETVAL_PARAM) { + ERROR(RDT_PLUGIN ": Error retrieving monitoring capabilities."); + goto rdt_preinit_error2; + } + + if (g_rdt->cap_mon == NULL) { + ERROR( + RDT_PLUGIN + ": Monitoring capability not detected. Nothing to do for the plugin."); + goto rdt_preinit_error2; + } + + return (0); + +rdt_preinit_error2: + pqos_fini(); + +rdt_preinit_error1: + + sfree(g_rdt); + + return (-1); +} + +static int rdt_config(oconfig_item_t *ci) { + int ret = 0; + + ret = rdt_preinit(); + if (ret != 0) + return ret; + + for (int i = 0; i < ci->children_num; i++) { + oconfig_item_t *child = ci->children + i; + + if (strcasecmp("Cores", child->key) == 0) { + + ret = rdt_config_cgroups(child); + if (ret != 0) + return ret; + +#if COLLECT_DEBUG + rdt_dump_cgroups(); +#endif /* COLLECT_DEBUG */ + + } else { + ERROR(RDT_PLUGIN ": Unknown configuration parameter \"%s\".", + child->key); + } + } + + return (0); +} + +static void rdt_submit_derive(char *cgroup, char *type, char *type_instance, + derive_t value) { + value_list_t vl = VALUE_LIST_INIT; + + vl.values = &(value_t) { .derive = value }; + vl.values_len = 1; + + sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin)); + snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); + sstrncpy(vl.type, type, sizeof(vl.type)); + if (type_instance) + sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); + + plugin_dispatch_values(&vl); +} + +static void rdt_submit_gauge(char *cgroup, char *type, char *type_instance, + gauge_t value) { + value_list_t vl = VALUE_LIST_INIT; + + vl.values = &(value_t) { .gauge = value }; + vl.values_len = 1; + + sstrncpy(vl.plugin, RDT_PLUGIN, sizeof(vl.plugin)); + snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); + sstrncpy(vl.type, type, sizeof(vl.type)); + if (type_instance) + sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); + + plugin_dispatch_values(&vl); +} + +static int rdt_read(__attribute__((unused)) user_data_t *ud) { + int ret; + + if (g_rdt == NULL) { + ERROR(RDT_PLUGIN ": rdt_read: plugin not initialized."); + return (-EINVAL); + } + + ret = pqos_mon_poll(&g_rdt->pgroups[0], (unsigned)g_rdt->num_groups); + if (ret != PQOS_RETVAL_OK) { + ERROR(RDT_PLUGIN ": Failed to poll monitoring data."); + return (-1); + } + +#if COLLECT_DEBUG + rdt_dump_data(); +#endif /* COLLECT_DEBUG */ + + for (int i = 0; i < g_rdt->num_groups; i++) { + enum pqos_mon_event mbm_events = + (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW | + PQOS_MON_EVENT_RMEM_BW); + + const struct pqos_event_values *pv = &g_rdt->pgroups[i]->values; + + /* Submit only monitored events data */ + + if (g_rdt->cgroups[i].events & PQOS_MON_EVENT_L3_OCCUP) + rdt_submit_gauge(g_rdt->cgroups[i].desc, "bytes", "llc", pv->llc); + + if (g_rdt->cgroups[i].events & PQOS_PERF_EVENT_IPC) + rdt_submit_gauge(g_rdt->cgroups[i].desc, "ipc", NULL, pv->ipc); + + if (g_rdt->cgroups[i].events & mbm_events) { + rdt_submit_derive(g_rdt->cgroups[i].desc, "memory_bandwidth", + "local", pv->mbm_local_delta); + rdt_submit_derive(g_rdt->cgroups[i].desc, "memory_bandwidth", + "remote", pv->mbm_remote_delta); + } + } + + return (0); +} + +static int rdt_init(void) { + int ret; + + ret = rdt_preinit(); + if (ret != 0) + return ret; + + /* Start monitoring */ + for (int i = 0; i < g_rdt->num_groups; i++) { + rdt_core_group_t *cg = &g_rdt->cgroups[i]; + + ret = pqos_mon_start(cg->num_cores, cg->cores, cg->events, (void *)cg->desc, + g_rdt->pgroups[i]); + + if (ret != PQOS_RETVAL_OK) + ERROR(RDT_PLUGIN ": Error starting monitoring group %s (pqos status=%d)", + cg->desc, ret); + } + + return (0); +} + +static int rdt_shutdown(void) { + int ret; + + DEBUG(RDT_PLUGIN ": rdt_shutdown."); + + if (g_rdt == NULL) + return (0); + + /* Stop monitoring */ + for (int i = 0; i < g_rdt->num_groups; i++) { + pqos_mon_stop(g_rdt->pgroups[i]); + } + + ret = pqos_fini(); + if (ret != PQOS_RETVAL_OK) + ERROR(RDT_PLUGIN ": Error shutting down PQoS library."); + + rdt_free_cgroups(); + sfree(g_rdt); + + return (0); +} + +void module_register(void) { + plugin_register_init(RDT_PLUGIN, rdt_init); + plugin_register_complex_config(RDT_PLUGIN, rdt_config); + plugin_register_complex_read(NULL, RDT_PLUGIN, rdt_read, 0, NULL); + plugin_register_shutdown(RDT_PLUGIN, rdt_shutdown); +} diff --git a/src/rdtmon.c b/src/rdtmon.c deleted file mode 100644 index 35c1c3f5..00000000 --- a/src/rdtmon.c +++ /dev/null @@ -1,678 +0,0 @@ -/** - * collectd - src/rdtmon.c - * - * Copyright(c) 2016 Intel Corporation. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished to do - * so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors: - * Serhiy Pshyk - **/ - -#include - -#include "common.h" - -#define RDTMON_PLUGIN "rdtmon" - -#define RDTMON_MAX_SOCKETS 8 -#define RDTMON_MAX_SOCKET_CORES 64 -#define RDTMON_MAX_CORES (RDTMON_MAX_SOCKET_CORES * RDTMON_MAX_SOCKETS) - -struct rdtmon_core_group_s { - char *desc; - size_t num_cores; - unsigned *cores; - enum pqos_mon_event events; -}; -typedef struct rdtmon_core_group_s rdtmon_core_group_t; - -struct rdtmon_ctx_s { - rdtmon_core_group_t cgroups[RDTMON_MAX_CORES]; - struct pqos_mon_data *pgroups[RDTMON_MAX_CORES]; - size_t num_groups; - const struct pqos_cpuinfo *pqos_cpu; - const struct pqos_cap *pqos_cap; - const struct pqos_capability *cap_mon; -}; -typedef struct rdtmon_ctx_s rdtmon_ctx_t; - -static rdtmon_ctx_t *g_rdtmon = NULL; - -static int isdup(const uint64_t *nums, size_t size, uint64_t val) { - for (size_t i = 0; i < size; i++) - if (nums[i] == val) - return 1; - return 0; -} - -static int strtouint64(const char *s, uint64_t *n) { - char *endptr = NULL; - - assert(s != NULL); - assert(n != NULL); - - *n = strtoull(s, &endptr, 0); - - if (!(*s != '\0' && *endptr == '\0')) { - DEBUG(RDTMON_PLUGIN ": Error converting '%s' to unsigned number.", s); - return (-EINVAL); - } - - return (0); -} - -/* - * NAME - * strlisttonums - * - * DESCRIPTION - * Converts string of characters representing list of numbers into array of - * numbers. Allowed formats are: - * 0,1,2,3 - * 0-10,20-18 - * 1,3,5-8,10,0x10-12 - * - * Numbers can be in decimal or hexadecimal format. - * - * PARAMETERS - * `s' String representing list of unsigned numbers. - * `nums' Array to put converted numeric values into. - * `max' Maximum number of elements that nums can accommodate. - * - * RETURN VALUE - * Number of elements placed into nums. - */ -static size_t strlisttonums(char *s, uint64_t *nums, size_t max) { - int ret; - size_t index = 0; - char *saveptr = NULL; - - if (s == NULL || nums == NULL || max == 0) - return index; - - for (;;) { - char *p = NULL; - char *token = NULL; - - token = strtok_r(s, ",", &saveptr); - if (token == NULL) - break; - - s = NULL; - - while (isspace(*token)) - token++; - if (*token == '\0') - continue; - - p = strchr(token, '-'); - if (p != NULL) { - uint64_t n, start, end; - *p = '\0'; - ret = strtouint64(token, &start); - if (ret < 0) - return (0); - ret = strtouint64(p + 1, &end); - if (ret < 0) - return (0); - if (start > end) { - return (0); - } - for (n = start; n <= end; n++) { - if (!(isdup(nums, index, n))) { - nums[index] = n; - index++; - } - if (index >= max) - return index; - } - } else { - uint64_t val; - - ret = strtouint64(token, &val); - if (ret < 0) - return (0); - - if (!(isdup(nums, index, val))) { - nums[index] = val; - index++; - } - if (index >= max) - return index; - } - } - - return index; -} - -/* - * NAME - * cgroup_cmp - * - * DESCRIPTION - * Function to compare cores in 2 core groups. - * - * PARAMETERS - * `cg_a' Pointer to core group a. - * `cg_b' Pointer to core group b. - * - * RETURN VALUE - * 1 if both groups contain the same cores - * 0 if none of their cores match - * -1 if some but not all cores match - */ -static int cgroup_cmp(const rdtmon_core_group_t *cg_a, - const rdtmon_core_group_t *cg_b) { - int found = 0; - - assert(cg_a != NULL); - assert(cg_b != NULL); - - const int sz_a = cg_a->num_cores; - const int sz_b = cg_b->num_cores; - const unsigned *tab_a = cg_a->cores; - const unsigned *tab_b = cg_b->cores; - - for (int i = 0; i < sz_a; i++) { - for (int j = 0; j < sz_b; j++) - if (tab_a[i] == tab_b[j]) - found++; - } - /* if no cores are the same */ - if (!found) - return 0; - /* if group contains same cores */ - if (sz_a == sz_b && sz_b == found) - return 1; - /* if not all cores are the same */ - return -1; -} - -static int cgroup_set(rdtmon_core_group_t *cg, char *desc, uint64_t *cores, - size_t num_cores) { - assert(cg != NULL); - assert(desc != NULL); - assert(cores != NULL); - assert(num_cores > 0); - - cg->cores = calloc(num_cores, sizeof(unsigned)); - if (cg->cores == NULL) { - ERROR(RDTMON_PLUGIN ": Error allocating core group table"); - return (-ENOMEM); - } - cg->num_cores = num_cores; - cg->desc = strdup(desc); - if (cg->desc == NULL) { - ERROR(RDTMON_PLUGIN ": Error allocating core group description"); - sfree(cg->cores); - return (-ENOMEM); - } - - for (size_t i = 0; i < num_cores; i++) - cg->cores[i] = (unsigned)cores[i]; - - return 0; -} - -/* - * NAME - * oconfig_to_cgroups - * - * DESCRIPTION - * Function to set the descriptions and cores for each core group. - * Takes a config option containing list of strings that are used to set - * core group values. - * - * PARAMETERS - * `item' Config option containing core groups. - * `groups' Table of core groups to set values in. - * `max_groups' Maximum number of core groups allowed. - * `max_core' Maximum allowed core value. - * - * RETURN VALUE - * On success, the number of core groups set up. On error, appropriate - * negative error value. - */ -static int oconfig_to_cgroups(oconfig_item_t *item, rdtmon_core_group_t *groups, - size_t max_groups, uint64_t max_core) { - int index = 0; - - assert(groups != NULL); - assert(max_groups > 0); - assert(item != NULL); - - for (int j = 0; j < item->values_num; j++) { - int ret; - size_t n; - uint64_t cores[RDTMON_MAX_CORES] = {0}; - char value[DATA_MAX_NAME_LEN]; - - if ((item->values[j].value.string == NULL) || (strlen(item->values[j].value.string) == 0)) - continue; - - sstrncpy(value, item->values[j].value.string, sizeof(value)); - - n = strlisttonums(value, cores, STATIC_ARRAY_SIZE(cores)); - if (n == 0) { - ERROR(RDTMON_PLUGIN ": Error parsing core group (%s)", - item->values[j].value.string); - return (-EINVAL); - } - - for (int i = 0; i < n; i++) { - if (cores[i] > max_core) { - ERROR(RDTMON_PLUGIN ": Core group (%s) contains invalid core id (%d)", - item->values[j].value.string, (int)cores[i]); - return (-EINVAL); - } - } - - /* set core group info */ - ret = cgroup_set(&groups[index], item->values[j].value.string, cores, n); - if (ret < 0) - return ret; - - index++; - - if (index >= max_groups) { - WARNING(RDTMON_PLUGIN ": Too many core groups configured"); - return index; - } - } - - return index; -} - -#if COLLECT_DEBUG -static void rdtmon_dump_cgroups(void) { - char cores[RDTMON_MAX_CORES * 4]; - - if (g_rdtmon == NULL) - return; - - DEBUG(RDTMON_PLUGIN ": Core Groups Dump"); - DEBUG(RDTMON_PLUGIN ": groups count: %zu", g_rdtmon->num_groups); - - for (int i = 0; i < g_rdtmon->num_groups; i++) { - - memset(cores, 0, sizeof(cores)); - for (int j = 0; j < g_rdtmon->cgroups[i].num_cores; j++) { - snprintf(cores + strlen(cores), sizeof(cores) - strlen(cores) - 1, " %d", - g_rdtmon->cgroups[i].cores[j]); - } - - DEBUG(RDTMON_PLUGIN ": group[%d]:", i); - DEBUG(RDTMON_PLUGIN ": description: %s", g_rdtmon->cgroups[i].desc); - DEBUG(RDTMON_PLUGIN ": cores: %s", cores); - DEBUG(RDTMON_PLUGIN ": events: 0x%X", g_rdtmon->cgroups[i].events); - } - - return; -} - -static inline double bytes_to_kb(const double bytes) { return bytes / 1024.0; } - -static inline double bytes_to_mb(const double bytes) { - return bytes / (1024.0 * 1024.0); -} - -static void rdtmon_dump_data(void) { - /* - * CORE - monitored group of cores - * RMID - Resource Monitoring ID associated with the monitored group - * LLC - last level cache occupancy - * MBL - local memory bandwidth - * MBR - remote memory bandwidth - */ - DEBUG(" CORE RMID LLC[KB] MBL[MB] MBR[MB]"); - for (int i = 0; i < g_rdtmon->num_groups; i++) { - - const struct pqos_event_values *pv = &g_rdtmon->pgroups[i]->values; - - double llc = bytes_to_kb(pv->llc); - double mbr = bytes_to_mb(pv->mbm_remote_delta); - double mbl = bytes_to_mb(pv->mbm_local_delta); - - DEBUG(" [%s] %8u %10.1f %10.1f %10.1f", g_rdtmon->cgroups[i].desc, - g_rdtmon->pgroups[i]->poll_ctx[0].rmid, llc, mbl, mbr); - } -} -#endif /* COLLECT_DEBUG */ - -static void rdtmon_free_cgroups(void) { - for (int i = 0; i < RDTMON_MAX_CORES; i++) { - sfree(g_rdtmon->cgroups[i].desc); - - sfree(g_rdtmon->cgroups[i].cores); - g_rdtmon->cgroups[i].num_cores = 0; - - sfree(g_rdtmon->pgroups[i]); - } -} - -static int rdtmon_default_cgroups(void) { - int ret; - - /* configure each core in separate group */ - for (unsigned i = 0; i < g_rdtmon->pqos_cpu->num_cores; i++) { - char desc[DATA_MAX_NAME_LEN]; - uint64_t core = i; - - ssnprintf(desc, sizeof(desc), "%d", g_rdtmon->pqos_cpu->cores[i].lcore); - - /* set core group info */ - ret = cgroup_set(&g_rdtmon->cgroups[i], desc, &core, 1); - if (ret < 0) - return ret; - } - - return g_rdtmon->pqos_cpu->num_cores; -} - -static int rdtmon_config_cgroups(oconfig_item_t *item) { - int n = 0; - enum pqos_mon_event events = 0; - - if (item == NULL) { - DEBUG(RDTMON_PLUGIN ": cgroups_config: Invalid argument."); - return (-EINVAL); - } - - DEBUG(RDTMON_PLUGIN ": Core groups [%d]:", item->values_num); - for (int j = 0; j < item->values_num; j++) { - if (item->values[j].type != OCONFIG_TYPE_STRING) { - ERROR(RDTMON_PLUGIN ": given core group value is not a string [idx=%d]", - j); - return (-EINVAL); - } - DEBUG(RDTMON_PLUGIN ": [%d]: %s", j, item->values[j].value.string); - } - - n = oconfig_to_cgroups(item, g_rdtmon->cgroups, RDTMON_MAX_CORES, - g_rdtmon->pqos_cpu->num_cores-1); - if (n < 0) { - rdtmon_free_cgroups(); - ERROR(RDTMON_PLUGIN ": Error parsing core groups configuration."); - return (-EINVAL); - } - - if (n == 0) { - /* create default core groups if "Cores" config option is empty */ - n = rdtmon_default_cgroups(); - if (n < 0) { - rdtmon_free_cgroups(); - ERROR(RDTMON_PLUGIN - ": Error creating default core groups configuration."); - return n; - } - INFO(RDTMON_PLUGIN - ": No core groups configured. Default core groups created."); - } - - /* Get all available events on this platform */ - for (int i = 0; i < g_rdtmon->cap_mon->u.mon->num_events; i++) - events |= g_rdtmon->cap_mon->u.mon->events[i].type; - - events &= ~(PQOS_PERF_EVENT_LLC_MISS); - - DEBUG(RDTMON_PLUGIN ": Number of cores in the system: %u", - g_rdtmon->pqos_cpu->num_cores); - DEBUG(RDTMON_PLUGIN ": Available events to monitor: %#x", events); - - g_rdtmon->num_groups = n; - for (int i = 0; i < n; i++) { - for (int j = 0; j < i; j++) { - int found = 0; - found = cgroup_cmp(&g_rdtmon->cgroups[j], &g_rdtmon->cgroups[i]); - if (found != 0) { - rdtmon_free_cgroups(); - ERROR(RDTMON_PLUGIN ": Cannot monitor same cores in different groups."); - return (-EINVAL); - } - } - - g_rdtmon->cgroups[i].events = events; - g_rdtmon->pgroups[i] = calloc(1, sizeof(*g_rdtmon->pgroups[i])); - if (g_rdtmon->pgroups[i] == NULL) { - rdtmon_free_cgroups(); - ERROR(RDTMON_PLUGIN ": Failed to allocate memory for monitoring data."); - return (-ENOMEM); - } - } - - return (0); -} - -static int rdtmon_preinit(void) { - int ret; - - if (g_rdtmon != NULL) { - /* already initialized if config callback was called before init callback */ - return (0); - } - - g_rdtmon = calloc(1, sizeof(*g_rdtmon)); - if (g_rdtmon == NULL) { - ERROR(RDTMON_PLUGIN ": Failed to allocate memory for rdtmon context."); - return (-ENOMEM); - } - - /* In case previous instance of the application was not closed properly - * call fini and ignore return code. */ - pqos_fini(); - - /* TODO: - * stdout should not be used here. Will be reworked when support of log - * callback is added to PQoS library. - */ - ret = pqos_init(&(struct pqos_config){.fd_log = STDOUT_FILENO}); - if (ret != PQOS_RETVAL_OK) { - ERROR(RDTMON_PLUGIN ": Error initializing PQoS library!"); - goto rdtmon_preinit_error1; - } - - ret = pqos_cap_get(&g_rdtmon->pqos_cap, &g_rdtmon->pqos_cpu); - if (ret != PQOS_RETVAL_OK) { - ERROR(RDTMON_PLUGIN ": Error retrieving PQoS capabilities."); - goto rdtmon_preinit_error2; - } - - ret = pqos_cap_get_type(g_rdtmon->pqos_cap, PQOS_CAP_TYPE_MON, - &g_rdtmon->cap_mon); - if (ret == PQOS_RETVAL_PARAM) { - ERROR(RDTMON_PLUGIN ": Error retrieving monitoring capabilities."); - goto rdtmon_preinit_error2; - } - - if (g_rdtmon->cap_mon == NULL) { - ERROR( - RDTMON_PLUGIN - ": Monitoring capability not detected. Nothing to do for the plugin."); - goto rdtmon_preinit_error2; - } - - return (0); - -rdtmon_preinit_error2: - pqos_fini(); - -rdtmon_preinit_error1: - - sfree(g_rdtmon); - - return (-1); -} - -static int rdtmon_config(oconfig_item_t *ci) { - int ret = 0; - - ret = rdtmon_preinit(); - if (ret != 0) - return ret; - - for (int i = 0; i < ci->children_num; i++) { - oconfig_item_t *child = ci->children + i; - - if (strcasecmp("Cores", child->key) == 0) { - - ret = rdtmon_config_cgroups(child); - if (ret != 0) - return ret; - -#if COLLECT_DEBUG - rdtmon_dump_cgroups(); -#endif /* COLLECT_DEBUG */ - - } else { - ERROR(RDTMON_PLUGIN ": Unknown configuration parameter \"%s\".", - child->key); - } - } - - return (0); -} - -static void rdtmon_submit_derive(char *cgroup, char *type, char *type_instance, - derive_t value) { - value_list_t vl = VALUE_LIST_INIT; - - vl.values = &(value_t) { .derive = value }; - vl.values_len = 1; - - sstrncpy(vl.plugin, RDTMON_PLUGIN, sizeof(vl.plugin)); - snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); - sstrncpy(vl.type, type, sizeof(vl.type)); - if (type_instance) - sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); - - plugin_dispatch_values(&vl); -} - -static void rdtmon_submit_gauge(char *cgroup, char *type, char *type_instance, - gauge_t value) { - value_list_t vl = VALUE_LIST_INIT; - - vl.values = &(value_t) { .gauge = value }; - vl.values_len = 1; - - sstrncpy(vl.plugin, RDTMON_PLUGIN, sizeof(vl.plugin)); - snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s", cgroup); - sstrncpy(vl.type, type, sizeof(vl.type)); - if (type_instance) - sstrncpy(vl.type_instance, type_instance, sizeof(vl.type_instance)); - - plugin_dispatch_values(&vl); -} - -static int rdtmon_read(__attribute__((unused)) user_data_t *ud) { - int ret; - - if (g_rdtmon == NULL) { - ERROR(RDTMON_PLUGIN ": rdtmon_read: plugin not initialized."); - return (-EINVAL); - } - - ret = pqos_mon_poll(&g_rdtmon->pgroups[0], (unsigned)g_rdtmon->num_groups); - if (ret != PQOS_RETVAL_OK) { - ERROR(RDTMON_PLUGIN ": Failed to poll monitoring data."); - return (-1); - } - -#if COLLECT_DEBUG - rdtmon_dump_data(); -#endif /* COLLECT_DEBUG */ - - for (int i = 0; i < g_rdtmon->num_groups; i++) { - enum pqos_mon_event mbm_events = - (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_TMEM_BW | - PQOS_MON_EVENT_RMEM_BW); - - const struct pqos_event_values *pv = &g_rdtmon->pgroups[i]->values; - - /* Submit only monitored events data */ - - if (g_rdtmon->cgroups[i].events & PQOS_MON_EVENT_L3_OCCUP) - rdtmon_submit_gauge(g_rdtmon->cgroups[i].desc, "bytes", "llc", pv->llc); - - if (g_rdtmon->cgroups[i].events & PQOS_PERF_EVENT_IPC) - rdtmon_submit_gauge(g_rdtmon->cgroups[i].desc, "ipc", NULL, pv->ipc); - - if (g_rdtmon->cgroups[i].events & mbm_events) { - rdtmon_submit_derive(g_rdtmon->cgroups[i].desc, "memory_bandwidth", - "local", pv->mbm_local_delta); - rdtmon_submit_derive(g_rdtmon->cgroups[i].desc, "memory_bandwidth", - "remote", pv->mbm_remote_delta); - } - } - - return (0); -} - -static int rdtmon_init(void) { - int ret; - - ret = rdtmon_preinit(); - if (ret != 0) - return ret; - - /* Start monitoring */ - for (int i = 0; i < g_rdtmon->num_groups; i++) { - rdtmon_core_group_t *cg = &g_rdtmon->cgroups[i]; - - ret = pqos_mon_start(cg->num_cores, cg->cores, cg->events, (void *)cg->desc, - g_rdtmon->pgroups[i]); - - if (ret != PQOS_RETVAL_OK) - ERROR(RDTMON_PLUGIN ": Error starting monitoring group %s (pqos status=%d)", - cg->desc, ret); - } - - return (0); -} - -static int rdtmon_shutdown(void) { - int ret; - - DEBUG(RDTMON_PLUGIN ": rdtmon_shutdown."); - - if (g_rdtmon == NULL) - return (0); - - /* Stop monitoring */ - for (int i = 0; i < g_rdtmon->num_groups; i++) { - pqos_mon_stop(g_rdtmon->pgroups[i]); - } - - ret = pqos_fini(); - if (ret != PQOS_RETVAL_OK) - ERROR(RDTMON_PLUGIN ": Error shutting down PQoS library."); - - rdtmon_free_cgroups(); - sfree(g_rdtmon); - - return (0); -} - -void module_register(void) { - plugin_register_init(RDTMON_PLUGIN, rdtmon_init); - plugin_register_complex_config(RDTMON_PLUGIN, rdtmon_config); - plugin_register_complex_read(NULL, RDTMON_PLUGIN, rdtmon_read, 0, NULL); - plugin_register_shutdown(RDTMON_PLUGIN, rdtmon_shutdown); -}