1 /**
2 * collectd - src/mic.c
3 * Copyright (C) 2013 Battelle Memorial Institute
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; only version 2 of the License is applicable.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Authors:
19 * Evan Felix <evan.felix at pnnl.gov>
20 **/
22 #include "collectd.h"
24 #include "plugin.h"
25 #include "common.h"
26 #include "utils_ignorelist.h"
28 #include <MicAccessTypes.h>
29 #include <MicAccessErrorTypes.h>
30 #include <MicAccessApi.h>
31 #include <MicThermalAPI.h>
32 #include <MicPowerManagerAPI.h>
34 #define MAX_MICS 32
35 #define MAX_CORES 256
37 static MicDeviceOnSystem mics[MAX_MICS];
38 static U32 num_mics = 0;
39 static HANDLE mic_handle = NULL;
41 static int const therm_ids[] = {
42 eMicThermalDie, eMicThermalDevMem, eMicThermalFin, eMicThermalFout,
43 eMicThermalVccp, eMicThermalVddg, eMicThermalVddq };
44 static char const * const therm_names[] = {
45 "die", "devmem", "fin", "fout",
46 "vccp", "vddg", "vddq" };
48 static const char *config_keys[] =
49 {
50 "ShowCPU",
51 "ShowCPUCores",
52 "ShowMemory",
53 "ShowTemperatures",
54 "Temperature",
55 "IgnoreSelectedTemperature",
56 "ShowPower",
57 "Power",
58 "IgnoreSelectedPower"
59 };
60 static int config_keys_num = STATIC_ARRAY_SIZE (config_keys);
62 static _Bool show_cpu = 1;
63 static _Bool show_cpu_cores = 1;
64 static _Bool show_memory = 1;
65 static _Bool show_temps = 1;
66 static ignorelist_t *temp_ignore = NULL;
67 static _Bool show_power = 1;
68 static ignorelist_t *power_ignore = NULL;
70 static int mic_init (void)
71 {
72 U32 ret;
73 U32 mic_count;
75 if (mic_handle)
76 return (0);
78 mic_count = (U32) STATIC_ARRAY_SIZE(mics);
79 ret = MicInitAPI(&mic_handle, eTARGET_SCIF_DRIVER, mics, &mic_count);
80 if (ret != MIC_ACCESS_API_SUCCESS) {
81 ERROR("mic plugin: Problem initializing MicAccessAPI: %s",
82 MicGetErrorString(ret));
83 }
84 DEBUG("mic plugin: found: %"PRIu32" MIC(s)",mic_count);
86 if (mic_count<0 || mic_count>=MAX_MICS) {
87 ERROR("mic plugin: No Intel MICs in system");
88 return (1);
89 }
90 else {
91 num_mics = mic_count;
92 return (0);
93 }
94 }
96 static int mic_config (const char *key, const char *value) {
97 if (temp_ignore == NULL)
98 temp_ignore = ignorelist_create(1);
99 if (power_ignore == NULL)
100 power_ignore = ignorelist_create(1);
101 if (temp_ignore == NULL || power_ignore == NULL)
102 return (1);
104 if (strcasecmp("ShowCPU",key) == 0)
105 {
106 show_cpu = IS_TRUE(value);
107 }
108 else if (strcasecmp("ShowCPUCores",key) == 0)
109 {
110 show_cpu_cores = IS_TRUE(value);
111 }
112 else if (strcasecmp("ShowTemperatures",key) == 0)
113 {
114 show_temps = IS_TRUE(value);
115 }
116 else if (strcasecmp("ShowMemory",key) == 0)
117 {
118 show_memory = IS_TRUE(value);
119 }
120 else if (strcasecmp("ShowPower",key) == 0)
121 {
122 show_power = IS_TRUE(value);
123 }
124 else if (strcasecmp("Temperature",key) == 0)
125 {
126 ignorelist_add(temp_ignore,value);
127 }
128 else if (strcasecmp("IgnoreSelectedTemperature",key) == 0)
129 {
130 int invert = 1;
131 if (IS_TRUE(value))
132 invert = 0;
133 ignorelist_set_invert(temp_ignore,invert);
134 }
135 else if (strcasecmp("Power",key) == 0)
136 {
137 ignorelist_add(power_ignore,value);
138 }
139 else if (strcasecmp("IgnoreSelectedPower",key) == 0)
140 {
141 int invert = 1;
142 if (IS_TRUE(value))
143 invert = 0;
144 ignorelist_set_invert(power_ignore,invert);
145 }
146 else
147 {
148 return (-1);
149 }
150 return (0);
151 }
153 static void mic_submit_memory_use(int micnumber, const char *type_instance, U32 value)
154 {
155 value_list_t vl = VALUE_LIST_INIT;
157 /* MicAccessAPI reports KB's of memory, adjust for this */
158 DEBUG("mic plugin: Memory Value Report; %u %lf",value,((gauge_t)value)*1024.0);
160 vl.values = &(value_t) { .gauge = ((gauge_t)value) * 1024.0 };
161 vl.values_len = 1;
163 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
164 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance), "%i", micnumber);
165 strncpy (vl.type, "memory", sizeof (vl.type));
166 strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
168 plugin_dispatch_values (&vl);
169 }
171 /* Gather memory Utilization */
172 static int mic_read_memory(int mic)
173 {
174 U32 ret;
175 U32 mem_total,mem_free,mem_bufs;
177 ret = MicGetMemoryUtilization(mic_handle,&mem_total,&mem_free,&mem_bufs);
178 if (ret != MIC_ACCESS_API_SUCCESS) {
179 ERROR("mic plugin: Problem getting Memory Utilization: %s",
180 MicGetErrorString(ret));
181 return (1);
182 }
183 mic_submit_memory_use(mic,"free",mem_free);
184 mic_submit_memory_use(mic,"used",mem_total-mem_free-mem_bufs);
185 mic_submit_memory_use(mic,"buffered",mem_bufs);
186 DEBUG("mic plugin: Memory Read: %u %u %u",mem_total,mem_free,mem_bufs);
187 return (0);
188 }
190 static void mic_submit_temp(int micnumber, const char *type, gauge_t value)
191 {
192 value_list_t vl = VALUE_LIST_INIT;
194 vl.values = &(value_t) { .gauge = value };
195 vl.values_len = 1;
197 strncpy (vl.host, hostname_g, sizeof (vl.host));
198 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
199 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance),
200 "%i", micnumber);
201 strncpy (vl.type, "temperature", sizeof (vl.type));
202 strncpy (vl.type_instance, type, sizeof (vl.type_instance));
204 plugin_dispatch_values (&vl);
205 }
207 /* Gather Temperature Information */
208 static int mic_read_temps(int mic)
209 {
210 size_t num_therms = STATIC_ARRAY_SIZE(therm_ids);
212 for (size_t j = 0; j < num_therms; j++) {
213 U32 status;
214 U32 temp_buffer;
215 U32 buffer_size = (U32)sizeof(temp_buffer);
216 char const *name = therm_names[j];
218 if (ignorelist_match(temp_ignore, name) != 0)
219 continue;
221 status = MicGetTemperature(mic_handle, therm_ids[j],
222 &temp_buffer, &buffer_size);
223 if (status != MIC_ACCESS_API_SUCCESS) {
224 ERROR("mic plugin: Error reading temperature \"%s\": "
225 "%s", name, MicGetErrorString(status));
226 return (1);
227 }
228 mic_submit_temp(mic, name, temp_buffer);
229 }
230 return (0);
231 }
233 static void mic_submit_cpu(int micnumber, const char *type_instance,
234 int core, derive_t value)
235 {
236 value_list_t vl = VALUE_LIST_INIT;
238 vl.values = &(value_t) { .derive = value };
239 vl.values_len = 1;
241 strncpy (vl.host, hostname_g, sizeof (vl.host));
242 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
243 if (core < 0) /* global aggregation */
244 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance),
245 "%i", micnumber);
246 else /* per-core statistics */
247 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance),
248 "%i-cpu-%i", micnumber, core);
249 strncpy (vl.type, "cpu", sizeof (vl.type));
250 strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
252 plugin_dispatch_values (&vl);
253 }
255 /*Gather CPU Utilization Information */
256 static int mic_read_cpu(int mic)
257 {
258 MicCoreUtil core_util;
259 MicCoreJiff core_jiffs[MAX_CORES];
260 U32 core_jiffs_size;
261 U32 status;
263 core_jiffs_size = MAX_CORES * sizeof(MicCoreJiff);
264 status = MicGetCoreUtilization(mic_handle, &core_util,
265 core_jiffs, &core_jiffs_size);
266 if (status != MIC_ACCESS_API_SUCCESS) {
267 ERROR("mic plugin: Problem getting CPU utilization: %s",
268 MicGetErrorString(status));
269 return(-1);
270 }
272 if (show_cpu) {
273 mic_submit_cpu(mic, "user", -1, core_util.sum.user);
274 mic_submit_cpu(mic, "sys", -1, core_util.sum.sys);
275 mic_submit_cpu(mic, "nice", -1, core_util.sum.nice);
276 mic_submit_cpu(mic, "idle", -1, core_util.sum.idle);
277 }
279 if (show_cpu_cores) {
280 for (int j = 0; j < core_util.core; j++) {
281 mic_submit_cpu(mic, "user", j, core_jiffs[j].user);
282 mic_submit_cpu(mic, "sys", j, core_jiffs[j].sys);
283 mic_submit_cpu(mic, "nice", j, core_jiffs[j].nice);
284 mic_submit_cpu(mic, "idle", j, core_jiffs[j].idle);
285 }
286 }
287 return (0);
288 }
290 static void mic_submit_power(int micnumber, const char *type, const char *type_instance, gauge_t value)
291 {
292 value_list_t vl = VALUE_LIST_INIT;
294 vl.values = &(value_t) { .gauge = value };
295 vl.values_len = 1;
297 strncpy (vl.host, hostname_g, sizeof (vl.host));
298 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
299 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance), "%i", micnumber);
300 strncpy (vl.type, type, sizeof (vl.type));
301 strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
303 plugin_dispatch_values (&vl);
304 }
306 /* Gather Power Information */
307 static int mic_read_power(int mic)
308 {
309 U32 ret;
310 MicPwrUsage power_use;
312 ret = MicGetPowerUsage(mic_handle,&power_use);
313 if (ret != MIC_ACCESS_API_SUCCESS) {
314 ERROR("mic plugin: Problem getting Power Usage: %s",
315 MicGetErrorString(ret));
316 return (1);
317 }
319 /* power is in uWatts, current in mA, voltage in uVolts.. convert to
320 * base unit */
321 #define SUB_POWER(name) do { if (ignorelist_match(power_ignore,#name)==0) \
322 mic_submit_power(mic,"power",#name,(gauge_t)power_use.name.prr*0.000001); \
323 } while(0)
324 #define SUB_VOLTS(name) do { if (ignorelist_match(power_ignore,#name)==0) {\
325 mic_submit_power(mic,"power",#name,(gauge_t)(power_use.name.pwr*0.000001)); \
326 mic_submit_power(mic,"current",#name,(gauge_t)(power_use.name.cur*0.001)); \
327 mic_submit_power(mic,"voltage",#name,(gauge_t)(power_use.name.volt*0.000001)); \
328 }} while(0)
330 SUB_POWER(total0);
331 SUB_POWER(total1);
332 SUB_POWER(inst);
333 SUB_POWER(imax);
334 SUB_POWER(pcie);
335 SUB_POWER(c2x3);
336 SUB_POWER(c2x4);
337 SUB_VOLTS(vccp);
338 SUB_VOLTS(vddg);
339 SUB_VOLTS(vddq);
341 return (0);
342 }
344 static int mic_read (void)
345 {
346 U32 ret;
347 int error;
349 error = 0;
350 for (int i = 0;i<num_mics;i++) {
351 ret = MicInitAdapter(&mic_handle,&mics[i]);
352 if (ret != MIC_ACCESS_API_SUCCESS) {
353 ERROR("mic plugin: Problem initializing MicAdapter: %s",
354 MicGetErrorString(ret));
355 error = 1;
356 }
358 if (error == 0 && show_memory)
359 error = mic_read_memory(i);
361 if (error == 0 && show_temps)
362 error = mic_read_temps(i);
364 if (error == 0 && (show_cpu || show_cpu_cores))
365 error = mic_read_cpu(i);
367 if (error == 0 && (show_power))
368 error = mic_read_power(i);
370 ret = MicCloseAdapter(mic_handle);
371 if (ret != MIC_ACCESS_API_SUCCESS) {
372 ERROR("mic plugin: Problem closing MicAdapter: %s",
373 MicGetErrorString(ret));
374 error = 2;
375 break;
376 }
377 }
378 if (num_mics==0)
379 error = 3;
380 return error;
381 }
384 static int mic_shutdown (void)
385 {
386 if (mic_handle)
387 MicCloseAPI(&mic_handle);
388 mic_handle = NULL;
390 return (0);
391 }
393 void module_register (void)
394 {
395 plugin_register_init ("mic", mic_init);
396 plugin_register_shutdown ("mic", mic_shutdown);
397 plugin_register_read ("mic", mic_read);
398 plugin_register_config ("mic",mic_config, config_keys, config_keys_num);
399 } /* void module_register */
401 /*
402 * vim: set shiftwidth=8 softtabstop=8 noet textwidth=78 :
403 */