1 /**
2 * collectd - src/mic.c
3 * Copyright (C) 2013 Battelle Memorial Institute
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; only version 2 of the License is applicable.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Authors:
19 * Evan Felix <evan.felix at pnnl.gov>
20 **/
22 #include "collectd.h"
24 #include "common.h"
25 #include "plugin.h"
26 #include "utils_ignorelist.h"
28 #include <MicAccessApi.h>
29 #include <MicAccessErrorTypes.h>
30 #include <MicAccessTypes.h>
31 #include <MicPowerManagerAPI.h>
32 #include <MicThermalAPI.h>
34 #define MAX_MICS 32
35 #define MAX_CORES 256
37 static MicDeviceOnSystem mics[MAX_MICS];
38 static U32 num_mics = 0;
39 static HANDLE mic_handle = NULL;
41 static int const therm_ids[] = {
42 eMicThermalDie, eMicThermalDevMem, eMicThermalFin, eMicThermalFout,
43 eMicThermalVccp, eMicThermalVddg, eMicThermalVddq};
44 static char const *const therm_names[] = {"die", "devmem", "fin", "fout",
45 "vccp", "vddg", "vddq"};
47 static const char *config_keys[] = {
48 "ShowCPU", "ShowCPUCores", "ShowMemory",
49 "ShowTemperatures", "Temperature", "IgnoreSelectedTemperature",
50 "ShowPower", "Power", "IgnoreSelectedPower"};
51 static int config_keys_num = STATIC_ARRAY_SIZE(config_keys);
53 static _Bool show_cpu = 1;
54 static _Bool show_cpu_cores = 1;
55 static _Bool show_memory = 1;
56 static _Bool show_temps = 1;
57 static ignorelist_t *temp_ignore = NULL;
58 static _Bool show_power = 1;
59 static ignorelist_t *power_ignore = NULL;
61 static int mic_init(void) {
62 U32 ret;
63 U32 mic_count;
65 if (mic_handle)
66 return (0);
68 mic_count = (U32)STATIC_ARRAY_SIZE(mics);
69 ret = MicInitAPI(&mic_handle, eTARGET_SCIF_DRIVER, mics, &mic_count);
70 if (ret != MIC_ACCESS_API_SUCCESS) {
71 ERROR("mic plugin: Problem initializing MicAccessAPI: %s",
72 MicGetErrorString(ret));
73 }
74 DEBUG("mic plugin: found: %" PRIu32 " MIC(s)", mic_count);
76 if (mic_count < 0 || mic_count >= MAX_MICS) {
77 ERROR("mic plugin: No Intel MICs in system");
78 return (1);
79 } else {
80 num_mics = mic_count;
81 return (0);
82 }
83 }
85 static int mic_config(const char *key, const char *value) {
86 if (temp_ignore == NULL)
87 temp_ignore = ignorelist_create(1);
88 if (power_ignore == NULL)
89 power_ignore = ignorelist_create(1);
90 if (temp_ignore == NULL || power_ignore == NULL)
91 return (1);
93 if (strcasecmp("ShowCPU", key) == 0) {
94 show_cpu = IS_TRUE(value);
95 } else if (strcasecmp("ShowCPUCores", key) == 0) {
96 show_cpu_cores = IS_TRUE(value);
97 } else if (strcasecmp("ShowTemperatures", key) == 0) {
98 show_temps = IS_TRUE(value);
99 } else if (strcasecmp("ShowMemory", key) == 0) {
100 show_memory = IS_TRUE(value);
101 } else if (strcasecmp("ShowPower", key) == 0) {
102 show_power = IS_TRUE(value);
103 } else if (strcasecmp("Temperature", key) == 0) {
104 ignorelist_add(temp_ignore, value);
105 } else if (strcasecmp("IgnoreSelectedTemperature", key) == 0) {
106 int invert = 1;
107 if (IS_TRUE(value))
108 invert = 0;
109 ignorelist_set_invert(temp_ignore, invert);
110 } else if (strcasecmp("Power", key) == 0) {
111 ignorelist_add(power_ignore, value);
112 } else if (strcasecmp("IgnoreSelectedPower", key) == 0) {
113 int invert = 1;
114 if (IS_TRUE(value))
115 invert = 0;
116 ignorelist_set_invert(power_ignore, invert);
117 } else {
118 return (-1);
119 }
120 return (0);
121 }
123 static void mic_submit_memory_use(int micnumber, const char *type_instance,
124 U32 val) {
125 value_t values[1];
126 value_list_t vl = VALUE_LIST_INIT;
128 /* MicAccessAPI reports KB's of memory, adjust for this */
129 DEBUG("mic plugin: Memory Value Report; %u %lf", val,
130 ((gauge_t)val) * 1024.0);
131 values[0].gauge = ((gauge_t)val) * 1024.0;
133 vl.values = values;
134 vl.values_len = 1;
136 strncpy(vl.host, hostname_g, sizeof(vl.host));
137 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
138 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
139 strncpy(vl.type, "memory", sizeof(vl.type));
140 strncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
142 plugin_dispatch_values(&vl);
143 }
145 /* Gather memory Utilization */
146 static int mic_read_memory(int mic) {
147 U32 ret;
148 U32 mem_total, mem_free, mem_bufs;
150 ret = MicGetMemoryUtilization(mic_handle, &mem_total, &mem_free, &mem_bufs);
151 if (ret != MIC_ACCESS_API_SUCCESS) {
152 ERROR("mic plugin: Problem getting Memory Utilization: %s",
153 MicGetErrorString(ret));
154 return (1);
155 }
156 mic_submit_memory_use(mic, "free", mem_free);
157 mic_submit_memory_use(mic, "used", mem_total - mem_free - mem_bufs);
158 mic_submit_memory_use(mic, "buffered", mem_bufs);
159 DEBUG("mic plugin: Memory Read: %u %u %u", mem_total, mem_free, mem_bufs);
160 return (0);
161 }
163 static void mic_submit_temp(int micnumber, const char *type, gauge_t val) {
164 value_t values[1];
165 value_list_t vl = VALUE_LIST_INIT;
167 values[0].gauge = val;
169 vl.values = values;
170 vl.values_len = 1;
172 strncpy(vl.host, hostname_g, sizeof(vl.host));
173 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
174 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
175 strncpy(vl.type, "temperature", sizeof(vl.type));
176 strncpy(vl.type_instance, type, sizeof(vl.type_instance));
178 plugin_dispatch_values(&vl);
179 }
181 /* Gather Temperature Information */
182 static int mic_read_temps(int mic) {
183 size_t num_therms = STATIC_ARRAY_SIZE(therm_ids);
185 for (size_t j = 0; j < num_therms; j++) {
186 U32 status;
187 U32 temp_buffer;
188 U32 buffer_size = (U32)sizeof(temp_buffer);
189 char const *name = therm_names[j];
191 if (ignorelist_match(temp_ignore, name) != 0)
192 continue;
194 status =
195 MicGetTemperature(mic_handle, therm_ids[j], &temp_buffer, &buffer_size);
196 if (status != MIC_ACCESS_API_SUCCESS) {
197 ERROR("mic plugin: Error reading temperature \"%s\": "
198 "%s",
199 name, MicGetErrorString(status));
200 return (1);
201 }
202 mic_submit_temp(mic, name, temp_buffer);
203 }
204 return (0);
205 }
207 static void mic_submit_cpu(int micnumber, const char *type_instance, int core,
208 derive_t val) {
209 value_t values[1];
210 value_list_t vl = VALUE_LIST_INIT;
212 values[0].derive = val;
214 vl.values = values;
215 vl.values_len = 1;
217 strncpy(vl.host, hostname_g, sizeof(vl.host));
218 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
219 if (core < 0) /* global aggregation */
220 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
221 else /* per-core statistics */
222 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i-cpu-%i",
223 micnumber, core);
224 strncpy(vl.type, "cpu", sizeof(vl.type));
225 strncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
227 plugin_dispatch_values(&vl);
228 }
230 /*Gather CPU Utilization Information */
231 static int mic_read_cpu(int mic) {
232 MicCoreUtil core_util;
233 MicCoreJiff core_jiffs[MAX_CORES];
234 U32 core_jiffs_size;
235 U32 status;
237 core_jiffs_size = MAX_CORES * sizeof(MicCoreJiff);
238 status = MicGetCoreUtilization(mic_handle, &core_util, core_jiffs,
239 &core_jiffs_size);
240 if (status != MIC_ACCESS_API_SUCCESS) {
241 ERROR("mic plugin: Problem getting CPU utilization: %s",
242 MicGetErrorString(status));
243 return (-1);
244 }
246 if (show_cpu) {
247 mic_submit_cpu(mic, "user", -1, core_util.sum.user);
248 mic_submit_cpu(mic, "sys", -1, core_util.sum.sys);
249 mic_submit_cpu(mic, "nice", -1, core_util.sum.nice);
250 mic_submit_cpu(mic, "idle", -1, core_util.sum.idle);
251 }
253 if (show_cpu_cores) {
254 for (int j = 0; j < core_util.core; j++) {
255 mic_submit_cpu(mic, "user", j, core_jiffs[j].user);
256 mic_submit_cpu(mic, "sys", j, core_jiffs[j].sys);
257 mic_submit_cpu(mic, "nice", j, core_jiffs[j].nice);
258 mic_submit_cpu(mic, "idle", j, core_jiffs[j].idle);
259 }
260 }
261 return (0);
262 }
264 static void mic_submit_power(int micnumber, const char *type,
265 const char *type_instance, gauge_t val) {
266 value_t values[1];
267 value_list_t vl = VALUE_LIST_INIT;
269 values[0].gauge = val;
271 vl.values = values;
272 vl.values_len = 1;
274 strncpy(vl.host, hostname_g, sizeof(vl.host));
275 strncpy(vl.plugin, "mic", sizeof(vl.plugin));
276 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%i", micnumber);
277 strncpy(vl.type, type, sizeof(vl.type));
278 strncpy(vl.type_instance, type_instance, sizeof(vl.type_instance));
280 plugin_dispatch_values(&vl);
281 }
283 /* Gather Power Information */
284 static int mic_read_power(int mic) {
285 U32 ret;
286 MicPwrUsage power_use;
288 ret = MicGetPowerUsage(mic_handle, &power_use);
289 if (ret != MIC_ACCESS_API_SUCCESS) {
290 ERROR("mic plugin: Problem getting Power Usage: %s",
291 MicGetErrorString(ret));
292 return (1);
293 }
295 /* power is in uWatts, current in mA, voltage in uVolts.. convert to
296 * base unit */
297 #define SUB_POWER(name) \
298 do { \
299 if (ignorelist_match(power_ignore, #name) == 0) \
300 mic_submit_power(mic, "power", #name, \
301 (gauge_t)power_use.name.prr * 0.000001); \
302 } while (0)
303 #define SUB_VOLTS(name) \
304 do { \
305 if (ignorelist_match(power_ignore, #name) == 0) { \
306 mic_submit_power(mic, "power", #name, \
307 (gauge_t)(power_use.name.pwr * 0.000001)); \
308 mic_submit_power(mic, "current", #name, \
309 (gauge_t)(power_use.name.cur * 0.001)); \
310 mic_submit_power(mic, "voltage", #name, \
311 (gauge_t)(power_use.name.volt * 0.000001)); \
312 } \
313 } while (0)
315 SUB_POWER(total0);
316 SUB_POWER(total1);
317 SUB_POWER(inst);
318 SUB_POWER(imax);
319 SUB_POWER(pcie);
320 SUB_POWER(c2x3);
321 SUB_POWER(c2x4);
322 SUB_VOLTS(vccp);
323 SUB_VOLTS(vddg);
324 SUB_VOLTS(vddq);
326 return (0);
327 }
329 static int mic_read(void) {
330 U32 ret;
331 int error;
333 error = 0;
334 for (int i = 0; i < num_mics; i++) {
335 ret = MicInitAdapter(&mic_handle, &mics[i]);
336 if (ret != MIC_ACCESS_API_SUCCESS) {
337 ERROR("mic plugin: Problem initializing MicAdapter: %s",
338 MicGetErrorString(ret));
339 error = 1;
340 }
342 if (error == 0 && show_memory)
343 error = mic_read_memory(i);
345 if (error == 0 && show_temps)
346 error = mic_read_temps(i);
348 if (error == 0 && (show_cpu || show_cpu_cores))
349 error = mic_read_cpu(i);
351 if (error == 0 && (show_power))
352 error = mic_read_power(i);
354 ret = MicCloseAdapter(mic_handle);
355 if (ret != MIC_ACCESS_API_SUCCESS) {
356 ERROR("mic plugin: Problem closing MicAdapter: %s",
357 MicGetErrorString(ret));
358 error = 2;
359 break;
360 }
361 }
362 if (num_mics == 0)
363 error = 3;
364 return error;
365 }
367 static int mic_shutdown(void) {
368 if (mic_handle)
369 MicCloseAPI(&mic_handle);
370 mic_handle = NULL;
372 return (0);
373 }
375 void module_register(void) {
376 plugin_register_init("mic", mic_init);
377 plugin_register_shutdown("mic", mic_shutdown);
378 plugin_register_read("mic", mic_read);
379 plugin_register_config("mic", mic_config, config_keys, config_keys_num);
380 } /* void module_register */
382 /*
383 * vim: set shiftwidth=8 softtabstop=8 noet textwidth=78 :
384 */