1 /**
2 * collectd - src/mic.c
3 * Copyright (C) 2013 Battelle Memorial Institute
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; only version 2 of the License is applicable.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License along
15 * with this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *
18 * Authors:
19 * Evan Felix <evan.felix at pnnl.gov>
20 **/
22 #include "collectd.h"
23 #include "plugin.h"
24 #include "common.h"
25 #include "utils_ignorelist.h"
27 #include <MicAccessTypes.h>
28 #include <MicAccessErrorTypes.h>
29 #include <MicAccessApi.h>
30 #include <MicThermalAPI.h>
31 #include <MicPowerManagerAPI.h>
33 #define MAX_MICS 32
34 #define MAX_CORES 256
36 static MicDeviceOnSystem mics[MAX_MICS];
37 static U32 num_mics = 0;
38 static HANDLE mic_handle = NULL;
40 static int const therm_ids[] = {
41 eMicThermalDie, eMicThermalDevMem, eMicThermalFin, eMicThermalFout,
42 eMicThermalVccp, eMicThermalVddg, eMicThermalVddq };
43 static char const * const therm_names[] = {
44 "die", "devmem", "fin", "fout",
45 "vccp", "vddg", "vddq" };
47 static const char *config_keys[] =
48 {
49 "ShowCPU",
50 "ShowCPUCores",
51 "ShowMemory",
52 "ShowTemperatures",
53 "Temperature",
54 "IgnoreSelectedTemperature",
55 "ShowPower",
56 "Power",
57 "IgnoreSelectedPower"
58 };
59 static int config_keys_num = STATIC_ARRAY_SIZE (config_keys);
61 static _Bool show_cpu = 1;
62 static _Bool show_cpu_cores = 1;
63 static _Bool show_memory = 1;
64 static _Bool show_temps = 1;
65 static ignorelist_t *temp_ignore = NULL;
66 static _Bool show_power = 1;
67 static ignorelist_t *power_ignore = NULL;
69 static int mic_init (void)
70 {
71 U32 ret;
72 U32 mic_count;
74 if (mic_handle)
75 return (0);
77 mic_count = (U32) STATIC_ARRAY_SIZE(mics);
78 ret = MicInitAPI(&mic_handle, eTARGET_SCIF_DRIVER, mics, &mic_count);
79 if (ret != MIC_ACCESS_API_SUCCESS) {
80 ERROR("mic plugin: Problem initializing MicAccessAPI: %s",
81 MicGetErrorString(ret));
82 }
83 DEBUG("mic plugin: found: %"PRIu32" MIC(s)",mic_count);
85 if (mic_count<0 || mic_count>=MAX_MICS) {
86 ERROR("mic plugin: No Intel MICs in system");
87 return (1);
88 }
89 else {
90 num_mics = mic_count;
91 return (0);
92 }
93 }
95 static int mic_config (const char *key, const char *value) {
96 if (temp_ignore == NULL)
97 temp_ignore = ignorelist_create(1);
98 if (power_ignore == NULL)
99 power_ignore = ignorelist_create(1);
100 if (temp_ignore == NULL || power_ignore == NULL)
101 return (1);
103 if (strcasecmp("ShowCPU",key) == 0)
104 {
105 show_cpu = IS_TRUE(value);
106 }
107 else if (strcasecmp("ShowCPUCores",key) == 0)
108 {
109 show_cpu_cores = IS_TRUE(value);
110 }
111 else if (strcasecmp("ShowTemperatures",key) == 0)
112 {
113 show_temps = IS_TRUE(value);
114 }
115 else if (strcasecmp("ShowMemory",key) == 0)
116 {
117 show_memory = IS_TRUE(value);
118 }
119 else if (strcasecmp("ShowPower",key) == 0)
120 {
121 show_power = IS_TRUE(value);
122 }
123 else if (strcasecmp("Temperature",key) == 0)
124 {
125 ignorelist_add(temp_ignore,value);
126 }
127 else if (strcasecmp("IgnoreSelectedTemperature",key) == 0)
128 {
129 int invert = 1;
130 if (IS_TRUE(value))
131 invert = 0;
132 ignorelist_set_invert(temp_ignore,invert);
133 }
134 else if (strcasecmp("Power",key) == 0)
135 {
136 ignorelist_add(power_ignore,value);
137 }
138 else if (strcasecmp("IgnoreSelectedPower",key) == 0)
139 {
140 int invert = 1;
141 if (IS_TRUE(value))
142 invert = 0;
143 ignorelist_set_invert(power_ignore,invert);
144 }
145 else
146 {
147 return (-1);
148 }
149 return (0);
150 }
152 static void mic_submit_memory_use(int micnumber, const char *type_instance, U32 val)
153 {
154 value_t values[1];
155 value_list_t vl = VALUE_LIST_INIT;
157 /* MicAccessAPI reports KB's of memory, adjust for this */
158 DEBUG("mic plugin: Memory Value Report; %u %lf",val,((gauge_t)val)*1024.0);
159 values[0].gauge = ((gauge_t)val)*1024.0;
161 vl.values=values;
162 vl.values_len=1;
164 strncpy (vl.host, hostname_g, sizeof (vl.host));
165 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
166 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance), "%i", micnumber);
167 strncpy (vl.type, "memory", sizeof (vl.type));
168 strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
170 plugin_dispatch_values (&vl);
171 }
173 /* Gather memory Utilization */
174 static int mic_read_memory(int mic)
175 {
176 U32 ret;
177 U32 mem_total,mem_free,mem_bufs;
179 ret = MicGetMemoryUtilization(mic_handle,&mem_total,&mem_free,&mem_bufs);
180 if (ret != MIC_ACCESS_API_SUCCESS) {
181 ERROR("mic plugin: Problem getting Memory Utilization: %s",
182 MicGetErrorString(ret));
183 return (1);
184 }
185 mic_submit_memory_use(mic,"free",mem_free);
186 mic_submit_memory_use(mic,"used",mem_total-mem_free-mem_bufs);
187 mic_submit_memory_use(mic,"buffered",mem_bufs);
188 DEBUG("mic plugin: Memory Read: %u %u %u",mem_total,mem_free,mem_bufs);
189 return (0);
190 }
192 static void mic_submit_temp(int micnumber, const char *type, gauge_t val)
193 {
194 value_t values[1];
195 value_list_t vl = VALUE_LIST_INIT;
197 values[0].gauge = val;
199 vl.values=values;
200 vl.values_len=1;
202 strncpy (vl.host, hostname_g, sizeof (vl.host));
203 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
204 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance),
205 "%i", micnumber);
206 strncpy (vl.type, "temperature", sizeof (vl.type));
207 strncpy (vl.type_instance, type, sizeof (vl.type_instance));
209 plugin_dispatch_values (&vl);
210 }
212 /* Gather Temperature Information */
213 static int mic_read_temps(int mic)
214 {
215 size_t num_therms = STATIC_ARRAY_SIZE(therm_ids);
216 size_t j;
218 for (j = 0; j < num_therms; j++) {
219 U32 status;
220 U32 temp_buffer;
221 U32 buffer_size = (U32)sizeof(temp_buffer);
222 char const *name = therm_names[j];
224 if (ignorelist_match(temp_ignore, name) != 0)
225 continue;
227 status = MicGetTemperature(mic_handle, therm_ids[j],
228 &temp_buffer, &buffer_size);
229 if (status != MIC_ACCESS_API_SUCCESS) {
230 ERROR("mic plugin: Error reading temperature \"%s\": "
231 "%s", name, MicGetErrorString(status));
232 return (1);
233 }
234 mic_submit_temp(mic, name, temp_buffer);
235 }
236 return (0);
237 }
239 static void mic_submit_cpu(int micnumber, const char *type_instance,
240 int core, derive_t val)
241 {
242 value_t values[1];
243 value_list_t vl = VALUE_LIST_INIT;
245 values[0].derive = val;
247 vl.values=values;
248 vl.values_len=1;
250 strncpy (vl.host, hostname_g, sizeof (vl.host));
251 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
252 if (core < 0) /* global aggregation */
253 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance),
254 "%i", micnumber);
255 else /* per-core statistics */
256 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance),
257 "%i-cpu-%i", micnumber, core);
258 strncpy (vl.type, "cpu", sizeof (vl.type));
259 strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
261 plugin_dispatch_values (&vl);
262 }
264 /*Gather CPU Utilization Information */
265 static int mic_read_cpu(int mic)
266 {
267 MicCoreUtil core_util;
268 MicCoreJiff core_jiffs[MAX_CORES];
269 U32 core_jiffs_size;
270 U32 status;
272 core_jiffs_size = MAX_CORES * sizeof(MicCoreJiff);
273 status = MicGetCoreUtilization(mic_handle, &core_util,
274 core_jiffs, &core_jiffs_size);
275 if (status != MIC_ACCESS_API_SUCCESS) {
276 ERROR("mic plugin: Problem getting CPU utilization: %s",
277 MicGetErrorString(status));
278 return(-1);
279 }
281 if (show_cpu) {
282 mic_submit_cpu(mic, "user", -1, core_util.sum.user);
283 mic_submit_cpu(mic, "sys", -1, core_util.sum.sys);
284 mic_submit_cpu(mic, "nice", -1, core_util.sum.nice);
285 mic_submit_cpu(mic, "idle", -1, core_util.sum.idle);
286 }
288 if (show_cpu_cores) {
289 int j;
290 for (j = 0; j < core_util.core; j++) {
291 mic_submit_cpu(mic, "user", j, core_jiffs[j].user);
292 mic_submit_cpu(mic, "sys", j, core_jiffs[j].sys);
293 mic_submit_cpu(mic, "nice", j, core_jiffs[j].nice);
294 mic_submit_cpu(mic, "idle", j, core_jiffs[j].idle);
295 }
296 }
297 return (0);
298 }
300 static void mic_submit_power(int micnumber, const char *type, const char *type_instance, gauge_t val)
301 {
302 value_t values[1];
303 value_list_t vl = VALUE_LIST_INIT;
305 values[0].gauge = val;
307 vl.values=values;
308 vl.values_len=1;
310 strncpy (vl.host, hostname_g, sizeof (vl.host));
311 strncpy (vl.plugin, "mic", sizeof (vl.plugin));
312 ssnprintf (vl.plugin_instance, sizeof (vl.plugin_instance), "%i", micnumber);
313 strncpy (vl.type, type, sizeof (vl.type));
314 strncpy (vl.type_instance, type_instance, sizeof (vl.type_instance));
316 plugin_dispatch_values (&vl);
317 }
319 /* Gather Power Information */
320 static int mic_read_power(int mic)
321 {
322 U32 ret;
323 MicPwrUsage power_use;
325 ret = MicGetPowerUsage(mic_handle,&power_use);
326 if (ret != MIC_ACCESS_API_SUCCESS) {
327 ERROR("mic plugin: Problem getting Power Usage: %s",
328 MicGetErrorString(ret));
329 return (1);
330 }
332 /* power is in uWatts, current in mA, voltage in uVolts.. convert to
333 * base unit */
334 #define SUB_POWER(name) do { if (ignorelist_match(power_ignore,#name)==0) \
335 mic_submit_power(mic,"power",#name,(gauge_t)power_use.name.prr*0.000001); \
336 } while(0)
337 #define SUB_VOLTS(name) do { if (ignorelist_match(power_ignore,#name)==0) {\
338 mic_submit_power(mic,"power",#name,(gauge_t)(power_use.name.pwr*0.000001)); \
339 mic_submit_power(mic,"current",#name,(gauge_t)(power_use.name.cur*0.001)); \
340 mic_submit_power(mic,"voltage",#name,(gauge_t)(power_use.name.volt*0.000001)); \
341 }} while(0)
343 SUB_POWER(total0);
344 SUB_POWER(total1);
345 SUB_POWER(inst);
346 SUB_POWER(imax);
347 SUB_POWER(pcie);
348 SUB_POWER(c2x3);
349 SUB_POWER(c2x4);
350 SUB_VOLTS(vccp);
351 SUB_VOLTS(vddg);
352 SUB_VOLTS(vddq);
354 return (0);
355 }
357 static int mic_read (void)
358 {
359 int i;
360 U32 ret;
361 int error;
363 error=0;
364 for (i=0;i<num_mics;i++) {
365 ret = MicInitAdapter(&mic_handle,&mics[i]);
366 if (ret != MIC_ACCESS_API_SUCCESS) {
367 ERROR("mic plugin: Problem initializing MicAdapter: %s",
368 MicGetErrorString(ret));
369 error=1;
370 }
372 if (error == 0 && show_memory)
373 error = mic_read_memory(i);
375 if (error == 0 && show_temps)
376 error = mic_read_temps(i);
378 if (error == 0 && (show_cpu || show_cpu_cores))
379 error = mic_read_cpu(i);
381 if (error == 0 && (show_power))
382 error = mic_read_power(i);
384 ret = MicCloseAdapter(mic_handle);
385 if (ret != MIC_ACCESS_API_SUCCESS) {
386 ERROR("mic plugin: Problem closing MicAdapter: %s",
387 MicGetErrorString(ret));
388 error=2;
389 break;
390 }
391 }
392 if (num_mics==0)
393 error=3;
394 return error;
395 }
398 static int mic_shutdown (void)
399 {
400 if (mic_handle)
401 MicCloseAPI(&mic_handle);
402 mic_handle = NULL;
404 return (0);
405 }
407 void module_register (void)
408 {
409 plugin_register_init ("mic", mic_init);
410 plugin_register_shutdown ("mic", mic_shutdown);
411 plugin_register_read ("mic", mic_read);
412 plugin_register_config ("mic",mic_config, config_keys, config_keys_num);
413 } /* void module_register */
415 /*
416 * vim: set shiftwidth=8 softtabstop=8 noet textwidth=78 :
417 */