1 /**
2 * collectd - src/mic.c
3 * Copyright (C) 2013 Battelle Memorial Institute
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; only version 2 of the License is applicable.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Authors:
19 * Evan Felix <evan.felix at pnnl.gov>
20 **/
22 #include "collectd.h"
24 #include "common.h"
25 #include "plugin.h"
26 #include "utils_ignorelist.h"
28 #include <MicAccessApi.h>
29 #include <MicAccessErrorTypes.h>
30 #include <MicAccessTypes.h>
31 #include <MicPowerManagerAPI.h>
32 #include <MicThermalAPI.h>
34 #define MAX_MICS 32
35 #define MAX_CORES 256
37 static MicDeviceOnSystem mics[MAX_MICS];
38 static U32 num_mics = 0;
39 static HANDLE mic_handle = NULL;
41 static int const therm_ids[] = {
42 eMicThermalDie, eMicThermalDevMem, eMicThermalFin, eMicThermalFout,
43 eMicThermalVccp, eMicThermalVddg, eMicThermalVddq};
44 static char const *const therm_names[] = {"die", "devmem", "fin", "fout",
45 "vccp", "vddg", "vddq"};
47 static const char *config_keys[] = {
48 "ShowCPU", "ShowCPUCores", "ShowMemory",
49 "ShowTemperatures", "Temperature", "IgnoreSelectedTemperature",
50 "ShowPower", "Power", "IgnoreSelectedPower"};
51 static int config_keys_num = STATIC_ARRAY_SIZE(config_keys);
53 static _Bool show_cpu = 1;
54 static _Bool show_cpu_cores = 1;
55 static _Bool show_memory = 1;
56 static _Bool show_temps = 1;
57 static ignorelist_t *temp_ignore = NULL;
58 static _Bool show_power = 1;
59 static ignorelist_t *power_ignore = NULL;
61 static int mic_init(void) {
62 U32 ret;
63 U32 mic_count;
65 if (mic_handle)
66 return (0);
68 mic_count = (U32)STATIC_ARRAY_SIZE(mics);
69 ret = MicInitAPI(&mic_handle, eTARGET_SCIF_DRIVER, mics, &mic_count);
70 if (ret != MIC_ACCESS_API_SUCCESS) {
71 ERROR("mic plugin: Problem initializing MicAccessAPI: %s",
72 MicGetErrorString(ret));
73 }
74 DEBUG("mic plugin: found: %" PRIu32 " MIC(s)", mic_count);
76 if (mic_count < 0 || mic_count >= MAX_MICS) {
77 ERROR("mic plugin: No Intel MICs in system");
78 return (1);
79 } else {
80 num_mics = mic_count;
81 return (0);
82 }
83 }
85 static int mic_config(const char *key, const char *value) {
86 if (temp_ignore == NULL)
87 temp_ignore = ignorelist_create(1);
88 if (power_ignore == NULL)
89 power_ignore = ignorelist_create(1);
90 if (temp_ignore == NULL || power_ignore == NULL)
91 return (1);
93 if (strcasecmp("ShowCPU", key) == 0) {
94 show_cpu = IS_TRUE(value);
95 } else if (strcasecmp("ShowCPUCores", key) == 0) {
96 show_cpu_cores = IS_TRUE(value);
97 } else if (strcasecmp("ShowTemperatures", key) == 0) {
98 show_temps = IS_TRUE(value);
99 } else if (strcasecmp("ShowMemory", key) == 0) {
100 show_memory = IS_TRUE(value);
101 } else if (strcasecmp("ShowPower", key) == 0) {
102 show_power = IS_TRUE(value);
103 } else if (strcasecmp("Temperature", key) == 0) {
104 ignorelist_add(temp_ignore, value);
105 } else if (strcasecmp("IgnoreSelectedTemperature", key) == 0) {
106 int invert = 1;
107 if (IS_TRUE(value))
108 invert = 0;
109 ignorelist_set_invert(temp_ignore, invert);
110 } else if (strcasecmp("Power", key) == 0) {
111 ignorelist_add(power_ignore, value);
112 } else if (strcasecmp("IgnoreSelectedPower", key) == 0) {
113 int invert = 1;
114 if (IS_TRUE(value))
115 invert = 0;
116 ignorelist_set_invert(power_ignore, invert);
117 } else {
118 return (-1);
119 }
120 return (0);
121 }
123 static void mic_submit_memory_use(int micnumber, const char *type_instance,
124 U32 value) {
125 value_list_t vl = VALUE_LIST_INIT;
127 /* MicAccessAPI reports KB's of memory, adjust for this */
128 DEBUG("mic plugin: Memory Value Report; %u %lf", value,
129 ((gauge_t)value) * 1024.0);
131 vl.values = &(value_t){.gauge = ((gauge_t)value) * 1024.0};
132 vl.values_len = 1;
134 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
135 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
136 strncpy(vl.type, "memory", sizeof(vl.type));
137 strncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
139 plugin_dispatch_values(&vl);
140 }
142 /* Gather memory Utilization */
143 static int mic_read_memory(int mic) {
144 U32 ret;
145 U32 mem_total, mem_free, mem_bufs;
147 ret = MicGetMemoryUtilization(mic_handle, &mem_total, &mem_free, &mem_bufs);
148 if (ret != MIC_ACCESS_API_SUCCESS) {
149 ERROR("mic plugin: Problem getting Memory Utilization: %s",
150 MicGetErrorString(ret));
151 return (1);
152 }
153 mic_submit_memory_use(mic, "free", mem_free);
154 mic_submit_memory_use(mic, "used", mem_total - mem_free - mem_bufs);
155 mic_submit_memory_use(mic, "buffered", mem_bufs);
156 DEBUG("mic plugin: Memory Read: %u %u %u", mem_total, mem_free, mem_bufs);
157 return (0);
158 }
160 static void mic_submit_temp(int micnumber, const char *type, gauge_t value) {
161 value_list_t vl = VALUE_LIST_INIT;
163 vl.values = &(value_t){.gauge = value};
164 vl.values_len = 1;
166 strncpy(vl.host, hostname_g, sizeof(vl.host));
167 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
168 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
169 strncpy(vl.type, "temperature", sizeof(vl.type));
170 strncpy(vl.type_instance, type, sizeof(vl.type_instance));
172 plugin_dispatch_values(&vl);
173 }
175 /* Gather Temperature Information */
176 static int mic_read_temps(int mic) {
177 size_t num_therms = STATIC_ARRAY_SIZE(therm_ids);
179 for (size_t j = 0; j < num_therms; j++) {
180 U32 status;
181 U32 temp_buffer;
182 U32 buffer_size = (U32)sizeof(temp_buffer);
183 char const *name = therm_names[j];
185 if (ignorelist_match(temp_ignore, name) != 0)
186 continue;
188 status =
189 MicGetTemperature(mic_handle, therm_ids[j], &temp_buffer, &buffer_size);
190 if (status != MIC_ACCESS_API_SUCCESS) {
191 ERROR("mic plugin: Error reading temperature \"%s\": "
192 "%s",
193 name, MicGetErrorString(status));
194 return (1);
195 }
196 mic_submit_temp(mic, name, temp_buffer);
197 }
198 return (0);
199 }
201 static void mic_submit_cpu(int micnumber, const char *type_instance, int core,
202 derive_t value) {
203 value_list_t vl = VALUE_LIST_INIT;
205 vl.values = &(value_t){.derive = value};
206 vl.values_len = 1;
208 strncpy(vl.host, hostname_g, sizeof(vl.host));
209 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
210 if (core < 0) /* global aggregation */
211 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
212 else /* per-core statistics */
213 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i-cpu-%i",
214 micnumber, core);
215 strncpy(vl.type, "cpu", sizeof(vl.type));
216 strncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
218 plugin_dispatch_values(&vl);
219 }
221 /*Gather CPU Utilization Information */
222 static int mic_read_cpu(int mic) {
223 MicCoreUtil core_util;
224 MicCoreJiff core_jiffs[MAX_CORES];
225 U32 core_jiffs_size;
226 U32 status;
228 core_jiffs_size = MAX_CORES * sizeof(MicCoreJiff);
229 status = MicGetCoreUtilization(mic_handle, &core_util, core_jiffs,
230 &core_jiffs_size);
231 if (status != MIC_ACCESS_API_SUCCESS) {
232 ERROR("mic plugin: Problem getting CPU utilization: %s",
233 MicGetErrorString(status));
234 return (-1);
235 }
237 if (show_cpu) {
238 mic_submit_cpu(mic, "user", -1, core_util.sum.user);
239 mic_submit_cpu(mic, "sys", -1, core_util.sum.sys);
240 mic_submit_cpu(mic, "nice", -1, core_util.sum.nice);
241 mic_submit_cpu(mic, "idle", -1, core_util.sum.idle);
242 }
244 if (show_cpu_cores) {
245 for (int j = 0; j < core_util.core; j++) {
246 mic_submit_cpu(mic, "user", j, core_jiffs[j].user);
247 mic_submit_cpu(mic, "sys", j, core_jiffs[j].sys);
248 mic_submit_cpu(mic, "nice", j, core_jiffs[j].nice);
249 mic_submit_cpu(mic, "idle", j, core_jiffs[j].idle);
250 }
251 }
252 return (0);
253 }
255 static void mic_submit_power(int micnumber, const char *type,
256 const char *type_instance, gauge_t value) {
257 value_list_t vl = VALUE_LIST_INIT;
259 vl.values = &(value_t){.gauge = value};
260 vl.values_len = 1;
262 strncpy(vl.host, hostname_g, sizeof(vl.host));
263 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
264 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
265 strncpy(vl.type, type, sizeof(vl.type));
266 strncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
268 plugin_dispatch_values(&vl);
269 }
271 /* Gather Power Information */
272 static int mic_read_power(int mic) {
273 U32 ret;
274 MicPwrUsage power_use;
276 ret = MicGetPowerUsage(mic_handle, &power_use);
277 if (ret != MIC_ACCESS_API_SUCCESS) {
278 ERROR("mic plugin: Problem getting Power Usage: %s",
279 MicGetErrorString(ret));
280 return (1);
281 }
283 /* power is in uWatts, current in mA, voltage in uVolts.. convert to
284 * base unit */
285 #define SUB_POWER(name) \
286 do { \
287 if (ignorelist_match(power_ignore, #name) == 0) \
288 mic_submit_power(mic, "power", #name, \
289 (gauge_t)power_use.name.prr * 0.000001); \
290 } while (0)
291 #define SUB_VOLTS(name) \
292 do { \
293 if (ignorelist_match(power_ignore, #name) == 0) { \
294 mic_submit_power(mic, "power", #name, \
295 (gauge_t)(power_use.name.pwr * 0.000001)); \
296 mic_submit_power(mic, "current", #name, \
297 (gauge_t)(power_use.name.cur * 0.001)); \
298 mic_submit_power(mic, "voltage", #name, \
299 (gauge_t)(power_use.name.volt * 0.000001)); \
300 } \
301 } while (0)
303 SUB_POWER(total0);
304 SUB_POWER(total1);
305 SUB_POWER(inst);
306 SUB_POWER(imax);
307 SUB_POWER(pcie);
308 SUB_POWER(c2x3);
309 SUB_POWER(c2x4);
310 SUB_VOLTS(vccp);
311 SUB_VOLTS(vddg);
312 SUB_VOLTS(vddq);
314 return (0);
315 }
317 static int mic_read(void) {
318 U32 ret;
319 int error;
321 error = 0;
322 for (int i = 0; i < num_mics; i++) {
323 ret = MicInitAdapter(&mic_handle, &mics[i]);
324 if (ret != MIC_ACCESS_API_SUCCESS) {
325 ERROR("mic plugin: Problem initializing MicAdapter: %s",
326 MicGetErrorString(ret));
327 error = 1;
328 }
330 if (error == 0 && show_memory)
331 error = mic_read_memory(i);
333 if (error == 0 && show_temps)
334 error = mic_read_temps(i);
336 if (error == 0 && (show_cpu || show_cpu_cores))
337 error = mic_read_cpu(i);
339 if (error == 0 && (show_power))
340 error = mic_read_power(i);
342 ret = MicCloseAdapter(mic_handle);
343 if (ret != MIC_ACCESS_API_SUCCESS) {
344 ERROR("mic plugin: Problem closing MicAdapter: %s",
345 MicGetErrorString(ret));
346 error = 2;
347 break;
348 }
349 }
350 if (num_mics == 0)
351 error = 3;
352 return error;
353 }
355 static int mic_shutdown(void) {
356 if (mic_handle)
357 MicCloseAPI(&mic_handle);
358 mic_handle = NULL;
360 return (0);
361 }
363 void module_register(void) {
364 plugin_register_init("mic", mic_init);
365 plugin_register_shutdown("mic", mic_shutdown);
366 plugin_register_read("mic", mic_read);
367 plugin_register_config("mic", mic_config, config_keys, config_keys_num);
368 } /* void module_register */
370 /*
371 * vim: set shiftwidth=8 softtabstop=8 noet textwidth=78 :
372 */