865b0d128752a81ddd8d3a3bb878ee61305a60f2
1 /*-
2 * collectd - src/mcelog.c
3 * MIT License
4 *
5 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
25 * Authors:
26 * Maryam Tahhan <maryam.tahhan@intel.com>
27 * Volodymyr Mytnyk <volodymyrx.mytnyk@intel.com>
28 * Taras Chornyi <tarasx.chornyi@intel.com>
29 * Krzysztof Matczak <krzysztofx.matczak@intel.com>
30 */
32 #include "collectd.h"
34 #include "common.h"
35 #include "utils_llist.h"
37 #include <poll.h>
38 #include <sys/socket.h>
39 #include <sys/un.h>
40 #include <unistd.h>
42 #define MCELOG_PLUGIN "mcelog"
43 #define MCELOG_BUFF_SIZE 1024
44 #define MCELOG_POLL_TIMEOUT 1000 /* ms */
45 #define MCELOG_SOCKET_STR "SOCKET"
46 #define MCELOG_DIMM_NAME "DMI_NAME"
47 #define MCELOG_CORRECTED_ERR "corrected memory errors"
48 #define MCELOG_UNCORRECTED_ERR "uncorrected memory errors"
49 #define MCELOG_CORRECTED_ERR_TYPE_INS "corrected_memory_errors"
50 #define MCELOG_UNCORRECTED_ERR_TYPE_INS "uncorrected_memory_errors"
52 typedef struct mcelog_config_s {
53 char logfile[PATH_MAX]; /* mcelog logfile */
54 pthread_t tid; /* poll thread id */
55 llist_t *dimms_list; /* DIMMs list */
56 /* lock for dimms cache */
57 _Bool persist;
58 pthread_mutex_t dimms_lock;
59 } mcelog_config_t;
61 typedef struct socket_adapter_s socket_adapter_t;
63 struct socket_adapter_s {
64 int sock_fd; /* mcelog server socket fd */
65 struct sockaddr_un unix_sock; /* mcelog client socket */
66 pthread_rwlock_t lock;
67 /* function pointers for socket operations */
68 int (*write)(socket_adapter_t *self, const char *msg, const size_t len);
69 int (*reinit)(socket_adapter_t *self);
70 int (*receive)(socket_adapter_t *self, FILE **p_file);
71 int (*close)(socket_adapter_t *self);
72 };
74 typedef struct mcelog_memory_rec_s {
75 int corrected_err_total; /* x total*/
76 int corrected_err_timed; /* x in 24h*/
77 char corrected_err_timed_period[DATA_MAX_NAME_LEN];
78 int uncorrected_err_total; /* x total*/
79 int uncorrected_err_timed; /* x in 24h*/
80 char uncorrected_err_timed_period[DATA_MAX_NAME_LEN];
81 char location[DATA_MAX_NAME_LEN]; /* SOCKET x CHANNEL x DIMM x*/
82 char dimm_name[DATA_MAX_NAME_LEN]; /* DMI_NAME "DIMM_F1" */
83 } mcelog_memory_rec_t;
85 static int socket_close(socket_adapter_t *self);
86 static int socket_write(socket_adapter_t *self, const char *msg,
87 const size_t len);
88 static int socket_reinit(socket_adapter_t *self);
89 static int socket_receive(socket_adapter_t *self, FILE **p_file);
91 static mcelog_config_t g_mcelog_config = {
92 .logfile = "/var/log/mcelog", .persist = 0,
93 };
95 static socket_adapter_t socket_adapter = {
96 .sock_fd = -1,
97 .unix_sock =
98 {
99 .sun_family = AF_UNIX, .sun_path = "/var/run/mcelog-client",
100 },
101 .lock = PTHREAD_RWLOCK_INITIALIZER,
102 .close = socket_close,
103 .write = socket_write,
104 .reinit = socket_reinit,
105 .receive = socket_receive,
106 };
108 static _Bool mcelog_thread_running;
110 static void mcelog_free_dimms_list_records(llist_t *dimms_list) {
112 for (llentry_t *e = llist_head(dimms_list); e != NULL; e = e->next) {
113 sfree(e->key);
114 sfree(e->value);
115 }
116 }
118 /* Create or get dimm by dimm name/location */
119 static llentry_t *mcelog_dimm(const mcelog_memory_rec_t *rec,
120 llist_t *dimms_list) {
122 char dimm_name[DATA_MAX_NAME_LEN];
124 if (strlen(rec->dimm_name) > 0) {
125 ssnprintf(dimm_name, sizeof(dimm_name), "%s_%s", rec->location,
126 rec->dimm_name);
127 } else
128 sstrncpy(dimm_name, rec->location, sizeof(dimm_name));
130 llentry_t *dimm_le = llist_search(g_mcelog_config.dimms_list, dimm_name);
132 if (dimm_le == NULL) {
133 mcelog_memory_rec_t *dimm_mr = calloc(1, sizeof(*dimm_mr));
134 if (dimm_mr == NULL) {
135 ERROR(MCELOG_PLUGIN ": Error allocating dimm memory item");
136 return NULL;
137 }
138 char *p_name = strdup(dimm_name);
139 if (p_name == NULL) {
140 ERROR(MCELOG_PLUGIN ": strdup: error");
141 return NULL;
142 }
144 /* add new dimm */
145 dimm_le = llentry_create(p_name, dimm_mr);
146 if (dimm_le == NULL) {
147 ERROR(MCELOG_PLUGIN ": llentry_create(): error");
148 free(dimm_mr);
149 return NULL;
150 }
151 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
152 llist_append(g_mcelog_config.dimms_list, dimm_le);
153 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
154 }
156 return dimm_le;
157 }
159 static void mcelog_update_dimm_stats(llentry_t *dimm,
160 const mcelog_memory_rec_t *rec) {
161 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
162 memcpy(dimm->value, rec, sizeof(mcelog_memory_rec_t));
163 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
164 }
166 static int mcelog_config(oconfig_item_t *ci) {
167 int use_logfile = 0, use_memory = 0;
168 for (int i = 0; i < ci->children_num; i++) {
169 oconfig_item_t *child = ci->children + i;
170 if (strcasecmp("McelogLogfile", child->key) == 0) {
171 use_logfile = 1;
172 if (use_memory) {
173 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Memory "
174 "option is already configured.",
175 child->key);
176 return (-1);
177 }
178 if (cf_util_get_string_buffer(child, g_mcelog_config.logfile,
179 sizeof(g_mcelog_config.logfile)) < 0) {
180 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
181 child->key);
182 return (-1);
183 }
184 memset(socket_adapter.unix_sock.sun_path, 0,
185 sizeof(socket_adapter.unix_sock.sun_path));
186 } else if (strcasecmp("Memory", child->key) == 0) {
187 if (use_logfile) {
188 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Logfile "
189 "option is already configured.",
190 child->key);
191 return (-1);
192 }
193 use_memory = 1;
194 oconfig_item_t *mem_child = child->children;
195 for (int j = 0; j < child->children_num; j++) {
196 mem_child += j;
197 if (strcasecmp("McelogClientSocket", mem_child->key) == 0) {
198 if (cf_util_get_string_buffer(
199 mem_child, socket_adapter.unix_sock.sun_path,
200 sizeof(socket_adapter.unix_sock.sun_path)) < 0) {
201 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
202 mem_child->key);
203 return (-1);
204 }
205 } else if (strcasecmp("PersistentNotification", mem_child->key) == 0) {
206 if (cf_util_get_boolean(mem_child, &g_mcelog_config.persist) < 0) {
207 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
208 mem_child->key);
209 return (-1);
210 }
211 } else {
212 ERROR(MCELOG_PLUGIN ": Invalid Memory configuration option: \"%s\".",
213 mem_child->key);
214 return (-1);
215 }
216 }
217 memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile));
218 } else {
219 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
220 child->key);
221 return (-1);
222 }
223 }
224 return (0);
225 }
227 static int socket_close(socket_adapter_t *self) {
228 int ret = 0;
229 pthread_rwlock_rdlock(&self->lock);
230 if (fcntl(self->sock_fd, F_GETFL) != -1) {
231 char errbuf[MCELOG_BUFF_SIZE];
232 if (shutdown(self->sock_fd, SHUT_RDWR) != 0) {
233 ERROR(MCELOG_PLUGIN ": Socket shutdown failed: %s",
234 sstrerror(errno, errbuf, sizeof(errbuf)));
235 ret = -1;
236 }
237 if (close(self->sock_fd) != 0) {
238 ERROR(MCELOG_PLUGIN ": Socket close failed: %s",
239 sstrerror(errno, errbuf, sizeof(errbuf)));
240 ret = -1;
241 }
242 }
243 pthread_rwlock_unlock(&self->lock);
244 return (ret);
245 }
247 static int socket_write(socket_adapter_t *self, const char *msg,
248 const size_t len) {
249 int ret = 0;
250 pthread_rwlock_rdlock(&self->lock);
251 if (swrite(self->sock_fd, msg, len) < 0)
252 ret = -1;
253 pthread_rwlock_unlock(&self->lock);
254 return (ret);
255 }
257 static void mcelog_dispatch_notification(notification_t *n) {
258 if (!n) {
259 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
260 return;
261 }
263 sstrncpy(n->host, hostname_g, sizeof(n->host));
264 sstrncpy(n->type, "gauge", sizeof(n->type));
265 plugin_dispatch_notification(n);
266 if (n->meta)
267 plugin_notification_meta_free(n->meta);
268 }
270 static int socket_reinit(socket_adapter_t *self) {
271 char errbuff[MCELOG_BUFF_SIZE];
272 int ret = -1;
273 cdtime_t interval = plugin_get_interval();
274 struct timeval socket_timeout = CDTIME_T_TO_TIMEVAL(interval);
276 /* synchronization via write lock since sock_fd may be changed here */
277 pthread_rwlock_wrlock(&self->lock);
278 self->sock_fd =
279 socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
280 if (self->sock_fd < 0) {
281 ERROR(MCELOG_PLUGIN ": Could not create a socket. %s",
282 sstrerror(errno, errbuff, sizeof(errbuff)));
283 pthread_rwlock_unlock(&self->lock);
284 return (ret);
285 }
287 /* Set socket timeout option */
288 if (setsockopt(self->sock_fd, SOL_SOCKET, SO_SNDTIMEO, &socket_timeout,
289 sizeof(socket_timeout)) < 0)
290 ERROR(MCELOG_PLUGIN ": Failed to set the socket timeout option.");
292 /* downgrading to read lock due to possible recursive read locks
293 * in self->close(self) call */
294 pthread_rwlock_unlock(&self->lock);
295 pthread_rwlock_rdlock(&self->lock);
296 if (connect(self->sock_fd, (struct sockaddr *)&(self->unix_sock),
297 sizeof(self->unix_sock)) < 0) {
298 ERROR(MCELOG_PLUGIN ": Failed to connect to mcelog server. %s",
299 sstrerror(errno, errbuff, sizeof(errbuff)));
300 self->close(self);
301 ret = -1;
302 } else {
303 ret = 0;
304 mcelog_dispatch_notification(
305 &(notification_t){.severity = NOTIF_OKAY,
306 .time = cdtime(),
307 .message = "Connected to mcelog server",
308 .plugin = MCELOG_PLUGIN,
309 .type_instance = "mcelog_status"});
310 }
311 pthread_rwlock_unlock(&self->lock);
312 return (ret);
313 }
315 static int mcelog_dispatch_mem_notifications(const mcelog_memory_rec_t *mr) {
316 notification_t n = {.severity = NOTIF_WARNING,
317 .time = cdtime(),
318 .plugin = MCELOG_PLUGIN,
319 .type = "errors"};
321 int dispatch_corrected_notifs = 0, dispatch_uncorrected_notifs = 0;
323 if (mr == NULL)
324 return (-1);
326 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
327 if (dimm == NULL) {
328 ERROR(MCELOG_PLUGIN
329 ": Error adding/getting dimm memory item to/from cache");
330 return (-1);
331 }
332 mcelog_memory_rec_t *mr_old = dimm->value;
333 if (!g_mcelog_config.persist) {
335 if (mr_old->corrected_err_total != mr->corrected_err_total ||
336 mr_old->corrected_err_timed != mr->corrected_err_timed)
337 dispatch_corrected_notifs = 1;
339 if (mr_old->uncorrected_err_total != mr->uncorrected_err_total ||
340 mr_old->uncorrected_err_timed != mr->uncorrected_err_timed)
341 dispatch_uncorrected_notifs = 1;
343 if (!dispatch_corrected_notifs && !dispatch_uncorrected_notifs) {
344 DEBUG("%s: No new notifications to dispatch", MCELOG_PLUGIN);
345 return (0);
346 }
347 } else {
348 dispatch_corrected_notifs = 1;
349 dispatch_uncorrected_notifs = 1;
350 }
352 sstrncpy(n.host, hostname_g, sizeof(n.host));
354 if (mr->dimm_name[0] != '\0')
355 ssnprintf(n.plugin_instance, sizeof(n.plugin_instance), "%s_%s",
356 mr->location, mr->dimm_name);
357 else
358 sstrncpy(n.plugin_instance, mr->location, sizeof(n.plugin_instance));
360 if (dispatch_corrected_notifs) {
361 /* Corrected Error Notifications */
362 if (mr->corrected_err_total > 0 || mr->corrected_err_timed > 0) {
363 if (plugin_notification_meta_add_signed_int(
364 &n, MCELOG_CORRECTED_ERR, mr->corrected_err_total) < 0) {
365 ERROR(MCELOG_PLUGIN ": add corrected errors meta data failed");
366 plugin_notification_meta_free(n.meta);
367 return (-1);
368 }
369 if (plugin_notification_meta_add_signed_int(
370 &n, "corrected memory timed errors", mr->corrected_err_timed) <
371 0) {
372 ERROR(MCELOG_PLUGIN ": add corrected timed errors meta data failed");
373 plugin_notification_meta_free(n.meta);
374 return (-1);
375 }
376 ssnprintf(n.message, sizeof(n.message), "Corrected Memory Errors");
377 sstrncpy(n.type_instance, MCELOG_CORRECTED_ERR_TYPE_INS,
378 sizeof(n.type_instance));
379 plugin_dispatch_notification(&n);
381 if (n.meta)
382 plugin_notification_meta_free(n.meta);
383 }
384 }
386 if (dispatch_uncorrected_notifs) {
387 /* Uncorrected Error Notifications */
388 if (mr->uncorrected_err_total > 0 || mr->uncorrected_err_timed > 0) {
389 if (plugin_notification_meta_add_signed_int(
390 &n, MCELOG_UNCORRECTED_ERR, mr->uncorrected_err_total) < 0) {
391 ERROR(MCELOG_PLUGIN ": add uncorrected errors meta data failed");
392 plugin_notification_meta_free(n.meta);
393 return (-1);
394 }
395 if (plugin_notification_meta_add_signed_int(
396 &n, "uncorrected memory timed errors",
397 mr->uncorrected_err_timed) < 0) {
398 ERROR(MCELOG_PLUGIN ": add uncorrected timed errors meta data failed");
399 plugin_notification_meta_free(n.meta);
400 return (-1);
401 }
402 ssnprintf(n.message, sizeof(n.message), "Uncorrected Memory Errors");
403 sstrncpy(n.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
404 sizeof(n.type_instance));
405 n.severity = NOTIF_FAILURE;
406 plugin_dispatch_notification(&n);
408 if (n.meta)
409 plugin_notification_meta_free(n.meta);
410 }
411 }
413 return (0);
414 }
416 static int mcelog_submit(const mcelog_memory_rec_t *mr) {
418 if (!mr) {
419 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
420 return (-1);
421 }
423 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
424 if (dimm == NULL) {
425 ERROR(MCELOG_PLUGIN
426 ": Error adding/getting dimm memory item to/from cache");
427 return (-1);
428 }
430 value_list_t vl = {
431 .values_len = 1,
432 .values = &(value_t){.derive = (derive_t)mr->corrected_err_total},
433 .time = cdtime(),
434 .plugin = MCELOG_PLUGIN,
435 .type = "errors",
436 .type_instance = MCELOG_CORRECTED_ERR_TYPE_INS};
438 mcelog_update_dimm_stats(dimm, mr);
440 if (mr->dimm_name[0] != '\0')
441 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s_%s",
442 mr->location, mr->dimm_name);
443 else
444 sstrncpy(vl.plugin_instance, mr->location, sizeof(vl.plugin_instance));
446 plugin_dispatch_values(&vl);
448 ssnprintf(vl.type_instance, sizeof(vl.type_instance),
449 "corrected_memory_errors_in_%s", mr->corrected_err_timed_period);
450 vl.values = &(value_t){.derive = (derive_t)mr->corrected_err_timed};
451 plugin_dispatch_values(&vl);
453 sstrncpy(vl.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
454 sizeof(vl.type_instance));
455 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_total};
456 plugin_dispatch_values(&vl);
458 ssnprintf(vl.type_instance, sizeof(vl.type_instance),
459 "uncorrected_memory_errors_in_%s",
460 mr->uncorrected_err_timed_period);
461 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_timed};
462 plugin_dispatch_values(&vl);
464 return (0);
465 }
467 static int parse_memory_info(FILE *p_file, mcelog_memory_rec_t *memory_record) {
468 char buf[DATA_MAX_NAME_LEN] = {0};
469 while (fgets(buf, sizeof(buf), p_file)) {
470 /* Got empty line or "done" */
471 if ((!strncmp("\n", buf, strlen(buf))) ||
472 (!strncmp(buf, "done\n", strlen(buf))))
473 return (1);
474 if (strlen(buf) < 5)
475 continue;
476 if (!strncmp(buf, MCELOG_SOCKET_STR, strlen(MCELOG_SOCKET_STR))) {
477 sstrncpy(memory_record->location, buf, strlen(buf));
478 /* replace spaces with '_' */
479 for (size_t i = 0; i < strlen(memory_record->location); i++)
480 if (memory_record->location[i] == ' ')
481 memory_record->location[i] = '_';
482 DEBUG(MCELOG_PLUGIN ": Got SOCKET INFO %s", memory_record->location);
483 }
484 if (!strncmp(buf, MCELOG_DIMM_NAME, strlen(MCELOG_DIMM_NAME))) {
485 char *name = NULL;
486 char *saveptr = NULL;
487 name = strtok_r(buf, "\"", &saveptr);
488 if (name != NULL && saveptr != NULL) {
489 name = strtok_r(NULL, "\"", &saveptr);
490 if (name != NULL) {
491 sstrncpy(memory_record->dimm_name, name,
492 sizeof(memory_record->dimm_name));
493 DEBUG(MCELOG_PLUGIN ": Got DIMM NAME %s", memory_record->dimm_name);
494 }
495 }
496 }
497 if (!strncmp(buf, MCELOG_CORRECTED_ERR, strlen(MCELOG_CORRECTED_ERR))) {
498 /* Get next line*/
499 if (fgets(buf, sizeof(buf), p_file) != NULL) {
500 sscanf(buf, "\t%d total", &(memory_record->corrected_err_total));
501 DEBUG(MCELOG_PLUGIN ": Got corrected error total %d",
502 memory_record->corrected_err_total);
503 }
504 if (fgets(buf, sizeof(buf), p_file) != NULL) {
505 sscanf(buf, "\t%d in %s", &(memory_record->corrected_err_timed),
506 memory_record->corrected_err_timed_period);
507 DEBUG(MCELOG_PLUGIN ": Got timed corrected errors %d in %s",
508 memory_record->corrected_err_total,
509 memory_record->corrected_err_timed_period);
510 }
511 }
512 if (!strncmp(buf, MCELOG_UNCORRECTED_ERR, strlen(MCELOG_UNCORRECTED_ERR))) {
513 if (fgets(buf, sizeof(buf), p_file) != NULL) {
514 sscanf(buf, "\t%d total", &(memory_record->uncorrected_err_total));
515 DEBUG(MCELOG_PLUGIN ": Got uncorrected error total %d",
516 memory_record->uncorrected_err_total);
517 }
518 if (fgets(buf, sizeof(buf), p_file) != NULL) {
519 sscanf(buf, "\t%d in %s", &(memory_record->uncorrected_err_timed),
520 memory_record->uncorrected_err_timed_period);
521 DEBUG(MCELOG_PLUGIN ": Got timed uncorrected errors %d in %s",
522 memory_record->uncorrected_err_total,
523 memory_record->uncorrected_err_timed_period);
524 }
525 }
526 memset(buf, 0, sizeof(buf));
527 }
528 /* parsing definitely finished */
529 return (0);
530 }
532 static void poll_worker_cleanup(void *arg) {
533 mcelog_thread_running = 0;
534 FILE *p_file = *((FILE **)arg);
535 if (p_file != NULL)
536 fclose(p_file);
537 free(arg);
538 }
540 static int socket_receive(socket_adapter_t *self, FILE **pp_file) {
541 int res = -1;
542 pthread_rwlock_rdlock(&self->lock);
543 struct pollfd poll_fd = {
544 .fd = self->sock_fd, .events = POLLIN | POLLPRI,
545 };
547 if ((res = poll(&poll_fd, 1, MCELOG_POLL_TIMEOUT)) <= 0) {
548 if (res != 0 && errno != EINTR) {
549 char errbuf[MCELOG_BUFF_SIZE];
550 ERROR("mcelog: poll failed: %s",
551 sstrerror(errno, errbuf, sizeof(errbuf)));
552 }
553 pthread_rwlock_unlock(&self->lock);
554 return (res);
555 }
557 if (poll_fd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
558 /* connection is broken */
559 ERROR(MCELOG_PLUGIN ": Connection to socket is broken");
560 if (poll_fd.revents & (POLLERR | POLLHUP)) {
561 mcelog_dispatch_notification(
562 &(notification_t){.severity = NOTIF_FAILURE,
563 .time = cdtime(),
564 .message = "Connection to mcelog socket is broken.",
565 .plugin = MCELOG_PLUGIN,
566 .type_instance = "mcelog_status"});
567 }
568 pthread_rwlock_unlock(&self->lock);
569 return (-1);
570 }
572 if (!(poll_fd.revents & (POLLIN | POLLPRI))) {
573 INFO(MCELOG_PLUGIN ": No data to read");
574 pthread_rwlock_unlock(&self->lock);
575 return (0);
576 }
578 if ((*pp_file = fdopen(dup(self->sock_fd), "r")) == NULL)
579 res = -1;
581 pthread_rwlock_unlock(&self->lock);
582 return (res);
583 }
585 static void *poll_worker(__attribute__((unused)) void *arg) {
586 char errbuf[MCELOG_BUFF_SIZE];
587 mcelog_thread_running = 1;
588 FILE **pp_file = calloc(1, sizeof(*pp_file));
589 if (pp_file == NULL) {
590 ERROR("mcelog: memory allocation failed: %s",
591 sstrerror(errno, errbuf, sizeof(errbuf)));
592 pthread_exit((void *)1);
593 }
595 pthread_cleanup_push(poll_worker_cleanup, pp_file);
597 while (1) {
598 /* blocking call */
599 int res = socket_adapter.receive(&socket_adapter, pp_file);
600 if (res < 0) {
601 socket_adapter.close(&socket_adapter);
602 while (socket_adapter.reinit(&socket_adapter) != 0) {
603 nanosleep(&CDTIME_T_TO_TIMESPEC(MS_TO_CDTIME_T(MCELOG_POLL_TIMEOUT)),
604 NULL);
605 }
606 continue;
607 }
608 /* timeout or no data to read */
609 else if (res == 0)
610 continue;
612 if (*pp_file == NULL)
613 continue;
615 mcelog_memory_rec_t memory_record = {0};
616 while (parse_memory_info(*pp_file, &memory_record)) {
617 /* Check if location was successfully parsed */
618 if (memory_record.location[0] == '\0') {
619 memset(&memory_record, 0, sizeof(memory_record));
620 continue;
621 }
623 if (mcelog_dispatch_mem_notifications(&memory_record) != 0)
624 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors notification");
625 if (mcelog_submit(&memory_record) != 0)
626 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors");
627 memset(&memory_record, 0, sizeof(memory_record));
628 }
630 fclose(*pp_file);
631 *pp_file = NULL;
632 }
634 mcelog_thread_running = 0;
635 pthread_cleanup_pop(1);
636 return (NULL);
637 }
639 static int mcelog_init(void) {
640 if (g_mcelog_config.logfile != NULL &&
641 socket_adapter.unix_sock.sun_path != NULL) {
642 INFO(MCELOG_PLUGIN
643 ": No configuration selected defaulting to memory errors.");
644 memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile));
645 }
646 g_mcelog_config.dimms_list = llist_create();
647 int err = pthread_mutex_init(&g_mcelog_config.dimms_lock, NULL);
648 if (err < 0) {
649 ERROR(MCELOG_PLUGIN ": plugin: failed to initialize cache lock");
650 return (-1);
651 }
653 if (socket_adapter.reinit(&socket_adapter) != 0) {
654 ERROR(MCELOG_PLUGIN ": Cannot connect to client socket");
655 return (-1);
656 }
658 if (socket_adapter.unix_sock.sun_path != NULL) {
659 if (plugin_thread_create(&g_mcelog_config.tid, NULL, poll_worker, NULL,
660 NULL) != 0) {
661 ERROR(MCELOG_PLUGIN ": Error creating poll thread.");
662 return (-1);
663 }
664 }
665 return (0);
666 }
668 static int get_memory_machine_checks(void) {
669 static const char dump[] = "dump all bios\n";
670 int ret = socket_adapter.write(&socket_adapter, dump, sizeof(dump));
671 if (ret != 0)
672 ERROR(MCELOG_PLUGIN ": SENT DUMP REQUEST FAILED");
673 else
674 DEBUG(MCELOG_PLUGIN ": SENT DUMP REQUEST OK");
675 return (ret);
676 }
678 static int mcelog_read(__attribute__((unused)) user_data_t *ud) {
679 DEBUG(MCELOG_PLUGIN ": %s", __FUNCTION__);
681 if (get_memory_machine_checks() != 0)
682 ERROR(MCELOG_PLUGIN ": MACHINE CHECK INFO NOT AVAILABLE");
684 return (0);
685 }
687 static int mcelog_shutdown(void) {
688 int ret = 0;
689 if (mcelog_thread_running) {
690 pthread_cancel(g_mcelog_config.tid);
691 if (pthread_join(g_mcelog_config.tid, NULL) != 0) {
692 ERROR(MCELOG_PLUGIN ": Stopping thread failed.");
693 ret = -1;
694 }
695 }
696 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
697 mcelog_free_dimms_list_records(g_mcelog_config.dimms_list);
698 llist_destroy(g_mcelog_config.dimms_list);
699 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
700 pthread_mutex_destroy(&g_mcelog_config.dimms_lock);
701 g_mcelog_config.dimms_list = NULL;
702 ret = socket_adapter.close(&socket_adapter) || ret;
703 pthread_rwlock_destroy(&(socket_adapter.lock));
704 return (-ret);
705 }
707 void module_register(void) {
708 plugin_register_complex_config(MCELOG_PLUGIN, mcelog_config);
709 plugin_register_init(MCELOG_PLUGIN, mcelog_init);
710 plugin_register_complex_read(NULL, MCELOG_PLUGIN, mcelog_read, 0, NULL);
711 plugin_register_shutdown(MCELOG_PLUGIN, mcelog_shutdown);
712 }