1 /*-
2 * collectd - src/mcelog.c
3 * MIT License
4 *
5 * Copyright(c) 2016 Intel Corporation. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
25 * Authors:
26 * Maryam Tahhan <maryam.tahhan@intel.com>
27 * Volodymyr Mytnyk <volodymyrx.mytnyk@intel.com>
28 * Taras Chornyi <tarasx.chornyi@intel.com>
29 * Krzysztof Matczak <krzysztofx.matczak@intel.com>
30 */
32 #include "collectd.h"
34 #include "common.h"
35 #include "utils_llist.h"
37 #include <poll.h>
38 #include <sys/socket.h>
39 #include <sys/un.h>
40 #include <unistd.h>
42 #define MCELOG_PLUGIN "mcelog"
43 #define MCELOG_BUFF_SIZE 1024
44 #define MCELOG_POLL_TIMEOUT 1000 /* ms */
45 #define MCELOG_SOCKET_STR "SOCKET"
46 #define MCELOG_DIMM_NAME "DMI_NAME"
47 #define MCELOG_CORRECTED_ERR "corrected memory errors"
48 #define MCELOG_UNCORRECTED_ERR "uncorrected memory errors"
49 #define MCELOG_CORRECTED_ERR_TYPE_INS "corrected_memory_errors"
50 #define MCELOG_UNCORRECTED_ERR_TYPE_INS "uncorrected_memory_errors"
52 typedef struct mcelog_config_s {
53 char logfile[PATH_MAX]; /* mcelog logfile */
54 pthread_t tid; /* poll thread id */
55 llist_t *dimms_list; /* DIMMs list */
56 /* lock for dimms cache */
57 pthread_mutex_t dimms_lock;
58 } mcelog_config_t;
60 typedef struct socket_adapter_s socket_adapter_t;
62 struct socket_adapter_s {
63 int sock_fd; /* mcelog server socket fd */
64 struct sockaddr_un unix_sock; /* mcelog client socket */
65 pthread_rwlock_t lock;
66 /* function pointers for socket operations */
67 int (*write)(socket_adapter_t *self, const char *msg, const size_t len);
68 int (*reinit)(socket_adapter_t *self);
69 int (*receive)(socket_adapter_t *self, FILE **p_file);
70 int (*close)(socket_adapter_t *self);
71 };
73 typedef struct mcelog_memory_rec_s {
74 int corrected_err_total; /* x total*/
75 int corrected_err_timed; /* x in 24h*/
76 char corrected_err_timed_period[DATA_MAX_NAME_LEN];
77 int uncorrected_err_total; /* x total*/
78 int uncorrected_err_timed; /* x in 24h*/
79 char uncorrected_err_timed_period[DATA_MAX_NAME_LEN];
80 char location[DATA_MAX_NAME_LEN]; /* SOCKET x CHANNEL x DIMM x*/
81 char dimm_name[DATA_MAX_NAME_LEN]; /* DMI_NAME "DIMM_F1" */
82 } mcelog_memory_rec_t;
84 static int socket_close(socket_adapter_t *self);
85 static int socket_write(socket_adapter_t *self, const char *msg,
86 const size_t len);
87 static int socket_reinit(socket_adapter_t *self);
88 static int socket_receive(socket_adapter_t *self, FILE **p_file);
90 static mcelog_config_t g_mcelog_config = {
91 .logfile = "/var/log/mcelog",
92 };
94 static socket_adapter_t socket_adapter = {
95 .sock_fd = -1,
96 .unix_sock =
97 {
98 .sun_family = AF_UNIX, .sun_path = "/var/run/mcelog-client",
99 },
100 .lock = PTHREAD_RWLOCK_INITIALIZER,
101 .close = socket_close,
102 .write = socket_write,
103 .reinit = socket_reinit,
104 .receive = socket_receive,
105 };
107 static _Bool mcelog_thread_running;
109 static void mcelog_free_dimms_list_records(llist_t *dimms_list) {
111 for (llentry_t *e = llist_head(dimms_list); e != NULL; e = e->next) {
112 sfree(e->key);
113 sfree(e->value);
114 }
116 }
118 static llentry_t *mcelog_get_dimm(const char *name, llist_t *dimms_list) {
119 if (dimms_list == NULL)
120 return NULL;
122 llentry_t *le = llist_search(g_mcelog_config.dimms_list, name);
123 if (le != NULL)
124 return le;
126 return NULL;
127 }
129 /* Create or get dimm by dimm name/location */
130 static llentry_t *mcelog_dimm(const mcelog_memory_rec_t *rec,
131 llist_t *dimms_list) {
133 char dimm_name[DATA_MAX_NAME_LEN];
135 if (strlen(rec->dimm_name) > 0) {
136 ssnprintf(dimm_name, sizeof(dimm_name), "%s_%s", rec->location,
137 rec->dimm_name);
138 } else
139 sstrncpy(dimm_name, rec->location, sizeof(dimm_name));
141 llentry_t *dimm_le = mcelog_get_dimm(dimm_name, dimms_list);
143 if (dimm_le == NULL) {
144 mcelog_memory_rec_t *dimm_mr = calloc(1, sizeof(*dimm_mr));
145 if (dimm_mr == NULL) {
146 ERROR(MCELOG_PLUGIN ": Error allocating dimm memory item");
147 return NULL;
148 }
149 char *p_name = strdup(dimm_name);
150 if (p_name == NULL) {
151 ERROR(MCELOG_PLUGIN ": strdup: error");
152 return NULL;
153 }
155 /* add new dimm */
156 dimm_le = llentry_create(p_name, dimm_mr);
157 if (dimm_le == NULL) {
158 ERROR(MCELOG_PLUGIN ": llentry_create(): error");
159 free(dimm_mr);
160 return NULL;
161 }
162 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
163 llist_append(g_mcelog_config.dimms_list, dimm_le);
164 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
165 }
167 return dimm_le;
168 }
170 static void mcelog_update_dimm_stats(llentry_t *dimm,
171 const mcelog_memory_rec_t *rec) {
172 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
173 memcpy(dimm->value, rec, sizeof(mcelog_memory_rec_t));
174 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
176 }
178 static int mcelog_config(oconfig_item_t *ci) {
179 for (int i = 0; i < ci->children_num; i++) {
180 oconfig_item_t *child = ci->children + i;
181 if (strcasecmp("McelogClientSocket", child->key) == 0) {
182 if (cf_util_get_string_buffer(child, socket_adapter.unix_sock.sun_path,
183 sizeof(socket_adapter.unix_sock.sun_path)) <
184 0) {
185 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
186 child->key);
187 return (-1);
188 }
189 } else if (strcasecmp("McelogLogfile", child->key) == 0) {
190 if (cf_util_get_string_buffer(child, g_mcelog_config.logfile,
191 sizeof(g_mcelog_config.logfile)) < 0) {
192 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
193 child->key);
194 return (-1);
195 }
196 } else {
197 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
198 child->key);
199 return (-1);
200 }
201 }
202 return (0);
203 }
205 static int socket_close(socket_adapter_t *self) {
206 int ret = 0;
207 pthread_rwlock_rdlock(&self->lock);
208 if (fcntl(self->sock_fd, F_GETFL) != -1) {
209 char errbuf[MCELOG_BUFF_SIZE];
210 if (shutdown(self->sock_fd, SHUT_RDWR) != 0) {
211 ERROR(MCELOG_PLUGIN ": Socket shutdown failed: %s",
212 sstrerror(errno, errbuf, sizeof(errbuf)));
213 ret = -1;
214 }
215 if (close(self->sock_fd) != 0) {
216 ERROR(MCELOG_PLUGIN ": Socket close failed: %s",
217 sstrerror(errno, errbuf, sizeof(errbuf)));
218 ret = -1;
219 }
220 }
221 pthread_rwlock_unlock(&self->lock);
222 return (ret);
223 }
225 static int socket_write(socket_adapter_t *self, const char *msg,
226 const size_t len) {
227 int ret = 0;
228 pthread_rwlock_rdlock(&self->lock);
229 if (swrite(self->sock_fd, msg, len) < 0)
230 ret = -1;
231 pthread_rwlock_unlock(&self->lock);
232 return (ret);
233 }
235 static void mcelog_dispatch_notification(notification_t *n) {
236 if (!n) {
237 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
238 return;
239 }
241 sstrncpy(n->host, hostname_g, sizeof(n->host));
242 sstrncpy(n->type, "gauge", sizeof(n->type));
243 plugin_dispatch_notification(n);
244 if (n->meta)
245 plugin_notification_meta_free(n->meta);
246 }
248 static int socket_reinit(socket_adapter_t *self) {
249 char errbuff[MCELOG_BUFF_SIZE];
250 int ret = -1;
251 cdtime_t interval = plugin_get_interval();
252 struct timeval socket_timeout = CDTIME_T_TO_TIMEVAL(interval);
254 /* synchronization via write lock since sock_fd may be changed here */
255 pthread_rwlock_wrlock(&self->lock);
256 self->sock_fd =
257 socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
258 if (self->sock_fd < 0) {
259 ERROR(MCELOG_PLUGIN ": Could not create a socket. %s",
260 sstrerror(errno, errbuff, sizeof(errbuff)));
261 pthread_rwlock_unlock(&self->lock);
262 return (ret);
263 }
265 /* Set socket timeout option */
266 if (setsockopt(self->sock_fd, SOL_SOCKET, SO_SNDTIMEO, &socket_timeout,
267 sizeof(socket_timeout)) < 0)
268 ERROR(MCELOG_PLUGIN ": Failed to set the socket timeout option.");
270 /* downgrading to read lock due to possible recursive read locks
271 * in self->close(self) call */
272 pthread_rwlock_unlock(&self->lock);
273 pthread_rwlock_rdlock(&self->lock);
274 if (connect(self->sock_fd, (struct sockaddr *)&(self->unix_sock),
275 sizeof(self->unix_sock)) < 0) {
276 ERROR(MCELOG_PLUGIN ": Failed to connect to mcelog server. %s",
277 sstrerror(errno, errbuff, sizeof(errbuff)));
278 self->close(self);
279 ret = -1;
280 } else {
281 ret = 0;
282 mcelog_dispatch_notification(
283 &(notification_t){.severity = NOTIF_OKAY,
284 .time = cdtime(),
285 .message = "Connected to mcelog server",
286 .plugin = MCELOG_PLUGIN,
287 .type_instance = "mcelog_status"});
288 }
289 pthread_rwlock_unlock(&self->lock);
290 return (ret);
291 }
293 static int mcelog_dispatch_mem_notifications(const mcelog_memory_rec_t *mr) {
294 notification_t n = {.severity = NOTIF_WARNING,
295 .time = cdtime(),
296 .plugin = MCELOG_PLUGIN,
297 .type = "errors"};
299 int dispatch_corrected_notifs = 0, dispatch_uncorrected_notifs = 0;
301 if (mr == NULL)
302 return (-1);
304 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
305 if (dimm == NULL) {
306 ERROR(MCELOG_PLUGIN ": Error adding/getting dimm memory item to/from cache");
307 return -1;
308 }
310 mcelog_memory_rec_t *mr_old = dimm->value;
312 if (mr_old->corrected_err_total != mr->corrected_err_total ||
313 mr_old->corrected_err_timed != mr->corrected_err_timed)
314 dispatch_corrected_notifs = 1;
316 if (mr_old->uncorrected_err_total != mr->uncorrected_err_total ||
317 mr_old->uncorrected_err_timed != mr->uncorrected_err_timed)
318 dispatch_uncorrected_notifs = 1;
320 if (!dispatch_corrected_notifs && !dispatch_uncorrected_notifs) {
321 DEBUG("%s: No new notifications to dispatch", MCELOG_PLUGIN);
322 return (0);
323 }
325 sstrncpy(n.host, hostname_g, sizeof(n.host));
327 if (mr->dimm_name[0] != '\0')
328 ssnprintf(n.plugin_instance, sizeof(n.plugin_instance), "%s_%s",
329 mr->location, mr->dimm_name);
330 else
331 sstrncpy(n.plugin_instance, mr->location, sizeof(n.plugin_instance));
333 if (dispatch_corrected_notifs) {
334 /* Corrected Error Notifications */
335 if (mr->corrected_err_total > 0 || mr->corrected_err_timed > 0) {
336 if (plugin_notification_meta_add_signed_int(
337 &n, MCELOG_CORRECTED_ERR, mr->corrected_err_total) < 0) {
338 ERROR(MCELOG_PLUGIN ": add corrected errors meta data failed");
339 plugin_notification_meta_free(n.meta);
340 return (-1);
341 }
342 if (plugin_notification_meta_add_signed_int(
343 &n, "corrected memory timed errors", mr->corrected_err_timed) <
344 0) {
345 ERROR(MCELOG_PLUGIN ": add corrected timed errors meta data failed");
346 plugin_notification_meta_free(n.meta);
347 return (-1);
348 }
349 ssnprintf(n.message, sizeof(n.message), "Corrected Memory Errors");
350 sstrncpy(n.type_instance, MCELOG_CORRECTED_ERR_TYPE_INS,
351 sizeof(n.type_instance));
352 plugin_dispatch_notification(&n);
354 if (n.meta)
355 plugin_notification_meta_free(n.meta);
356 }
357 }
359 if (dispatch_uncorrected_notifs) {
360 /* Uncorrected Error Notifications */
361 if (mr->uncorrected_err_total > 0 || mr->uncorrected_err_timed > 0) {
362 if (plugin_notification_meta_add_signed_int(
363 &n, MCELOG_UNCORRECTED_ERR, mr->uncorrected_err_total) < 0) {
364 ERROR(MCELOG_PLUGIN ": add uncorrected errors meta data failed");
365 plugin_notification_meta_free(n.meta);
366 return (-1);
367 }
368 if (plugin_notification_meta_add_signed_int(
369 &n, "uncorrected memory timed errors",
370 mr->uncorrected_err_timed) < 0) {
371 ERROR(MCELOG_PLUGIN ": add uncorrected timed errors meta data failed");
372 plugin_notification_meta_free(n.meta);
373 return (-1);
374 }
375 ssnprintf(n.message, sizeof(n.message), "Uncorrected Memory Errors");
376 sstrncpy(n.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
377 sizeof(n.type_instance));
378 n.severity = NOTIF_FAILURE;
379 plugin_dispatch_notification(&n);
381 if (n.meta)
382 plugin_notification_meta_free(n.meta);
383 }
384 }
386 return (0);
387 }
389 static int mcelog_submit(const mcelog_memory_rec_t *mr) {
391 if (!mr) {
392 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
393 return (-1);
394 }
396 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
397 if (dimm == NULL) {
398 ERROR(MCELOG_PLUGIN ": Error adding/getting dimm memory item to/from cache");
399 return -1;
400 }
402 value_list_t vl = {
403 .values_len = 1,
404 .values = &(value_t){.derive = (derive_t)mr->corrected_err_total},
405 .time = cdtime(),
406 .plugin = MCELOG_PLUGIN,
407 .type = "errors",
408 .type_instance = MCELOG_CORRECTED_ERR_TYPE_INS};
410 mcelog_update_dimm_stats(dimm, mr);
412 if (mr->dimm_name[0] != '\0')
413 ssnprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s_%s",
414 mr->location, mr->dimm_name);
415 else
416 sstrncpy(vl.plugin_instance, mr->location, sizeof(vl.plugin_instance));
418 plugin_dispatch_values(&vl);
420 ssnprintf(vl.type_instance, sizeof(vl.type_instance),
421 "corrected_memory_errors_in_%s", mr->corrected_err_timed_period);
422 vl.values = &(value_t){.derive = (derive_t)mr->corrected_err_timed};
423 plugin_dispatch_values(&vl);
425 sstrncpy(vl.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
426 sizeof(vl.type_instance));
427 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_total};
428 plugin_dispatch_values(&vl);
430 ssnprintf(vl.type_instance, sizeof(vl.type_instance),
431 "uncorrected_memory_errors_in_%s",
432 mr->uncorrected_err_timed_period);
433 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_timed};
434 plugin_dispatch_values(&vl);
436 return (0);
437 }
439 static int parse_memory_info(FILE *p_file, mcelog_memory_rec_t *memory_record) {
440 char buf[DATA_MAX_NAME_LEN] = {0};
441 while (fgets(buf, sizeof(buf), p_file)) {
442 /* Got empty line or "done" */
443 if ((!strncmp("\n", buf, strlen(buf))) ||
444 (!strncmp(buf, "done\n", strlen(buf))))
445 return (1);
446 if (strlen(buf) < 5)
447 continue;
448 if (!strncmp(buf, MCELOG_SOCKET_STR, strlen(MCELOG_SOCKET_STR))) {
449 sstrncpy(memory_record->location, buf, strlen(buf));
450 /* replace spaces with '_' */
451 for (size_t i = 0; i < strlen(memory_record->location); i++)
452 if (memory_record->location[i] == ' ')
453 memory_record->location[i] = '_';
454 DEBUG(MCELOG_PLUGIN ": Got SOCKET INFO %s", memory_record->location);
455 }
456 if (!strncmp(buf, MCELOG_DIMM_NAME, strlen(MCELOG_DIMM_NAME))) {
457 char *name = NULL;
458 char *saveptr = NULL;
459 name = strtok_r(buf, "\"", &saveptr);
460 if (name != NULL && saveptr != NULL) {
461 name = strtok_r(NULL, "\"", &saveptr);
462 if (name != NULL) {
463 sstrncpy(memory_record->dimm_name, name,
464 sizeof(memory_record->dimm_name));
465 DEBUG(MCELOG_PLUGIN ": Got DIMM NAME %s", memory_record->dimm_name);
466 }
467 }
468 }
469 if (!strncmp(buf, MCELOG_CORRECTED_ERR, strlen(MCELOG_CORRECTED_ERR))) {
470 /* Get next line*/
471 if (fgets(buf, sizeof(buf), p_file) != NULL) {
472 sscanf(buf, "\t%d total", &(memory_record->corrected_err_total));
473 DEBUG(MCELOG_PLUGIN ": Got corrected error total %d",
474 memory_record->corrected_err_total);
475 }
476 if (fgets(buf, sizeof(buf), p_file) != NULL) {
477 sscanf(buf, "\t%d in %s", &(memory_record->corrected_err_timed),
478 memory_record->corrected_err_timed_period);
479 DEBUG(MCELOG_PLUGIN ": Got timed corrected errors %d in %s",
480 memory_record->corrected_err_total,
481 memory_record->corrected_err_timed_period);
482 }
483 }
484 if (!strncmp(buf, MCELOG_UNCORRECTED_ERR, strlen(MCELOG_UNCORRECTED_ERR))) {
485 if (fgets(buf, sizeof(buf), p_file) != NULL) {
486 sscanf(buf, "\t%d total", &(memory_record->uncorrected_err_total));
487 DEBUG(MCELOG_PLUGIN ": Got uncorrected error total %d",
488 memory_record->uncorrected_err_total);
489 }
490 if (fgets(buf, sizeof(buf), p_file) != NULL) {
491 sscanf(buf, "\t%d in %s", &(memory_record->uncorrected_err_timed),
492 memory_record->uncorrected_err_timed_period);
493 DEBUG(MCELOG_PLUGIN ": Got timed uncorrected errors %d in %s",
494 memory_record->uncorrected_err_total,
495 memory_record->uncorrected_err_timed_period);
496 }
497 }
498 memset(buf, 0, sizeof(buf));
499 }
500 /* parsing definitely finished */
501 return (0);
502 }
504 static void poll_worker_cleanup(void *arg) {
505 mcelog_thread_running = 0;
506 FILE *p_file = *((FILE **)arg);
507 if (p_file != NULL)
508 fclose(p_file);
509 free(arg);
510 }
512 static int socket_receive(socket_adapter_t *self, FILE **pp_file) {
513 int res = -1;
514 pthread_rwlock_rdlock(&self->lock);
515 struct pollfd poll_fd = {
516 .fd = self->sock_fd, .events = POLLIN | POLLPRI,
517 };
519 if ((res = poll(&poll_fd, 1, MCELOG_POLL_TIMEOUT)) <= 0) {
520 if (res != 0 && errno != EINTR) {
521 char errbuf[MCELOG_BUFF_SIZE];
522 ERROR("mcelog: poll failed: %s",
523 sstrerror(errno, errbuf, sizeof(errbuf)));
524 }
525 pthread_rwlock_unlock(&self->lock);
526 return (res);
527 }
529 if (poll_fd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
530 /* connection is broken */
531 ERROR(MCELOG_PLUGIN ": Connection to socket is broken");
532 if (poll_fd.revents & (POLLERR | POLLHUP)) {
533 mcelog_dispatch_notification(
534 &(notification_t){.severity = NOTIF_FAILURE,
535 .time = cdtime(),
536 .message = "Connection to mcelog socket is broken.",
537 .plugin = MCELOG_PLUGIN,
538 .type_instance = "mcelog_status"});
539 }
540 pthread_rwlock_unlock(&self->lock);
541 return (-1);
542 }
544 if (!(poll_fd.revents & (POLLIN | POLLPRI))) {
545 INFO(MCELOG_PLUGIN ": No data to read");
546 pthread_rwlock_unlock(&self->lock);
547 return (0);
548 }
550 if ((*pp_file = fdopen(dup(self->sock_fd), "r")) == NULL)
551 res = -1;
553 pthread_rwlock_unlock(&self->lock);
554 return (res);
555 }
557 static void *poll_worker(__attribute__((unused)) void *arg) {
558 char errbuf[MCELOG_BUFF_SIZE];
559 mcelog_thread_running = 1;
560 FILE **pp_file = calloc(1, sizeof(*pp_file));
561 if (pp_file == NULL) {
562 ERROR("mcelog: memory allocation failed: %s",
563 sstrerror(errno, errbuf, sizeof(errbuf)));
564 pthread_exit((void *)1);
565 }
567 pthread_cleanup_push(poll_worker_cleanup, pp_file);
569 while (1) {
570 /* blocking call */
571 int res = socket_adapter.receive(&socket_adapter, pp_file);
572 if (res < 0) {
573 socket_adapter.close(&socket_adapter);
574 while (socket_adapter.reinit(&socket_adapter) != 0) {
575 nanosleep(&CDTIME_T_TO_TIMESPEC(MS_TO_CDTIME_T(MCELOG_POLL_TIMEOUT)),
576 NULL);
577 }
578 continue;
579 }
580 /* timeout or no data to read */
581 else if (res == 0)
582 continue;
584 if (*pp_file == NULL)
585 continue;
587 mcelog_memory_rec_t memory_record = {0};
588 while (parse_memory_info(*pp_file, &memory_record)) {
589 /* Check if location was successfully parsed */
590 if (memory_record.location[0] == '\0') {
591 memset(&memory_record, 0, sizeof(memory_record));
592 continue;
593 }
595 if (mcelog_dispatch_mem_notifications(&memory_record) != 0)
596 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors notification");
597 if (mcelog_submit(&memory_record) != 0)
598 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors");
599 memset(&memory_record, 0, sizeof(memory_record));
600 }
602 fclose(*pp_file);
603 *pp_file = NULL;
604 }
606 mcelog_thread_running = 0;
607 pthread_cleanup_pop(1);
608 return (NULL);
609 }
611 static int mcelog_init(void) {
612 g_mcelog_config.dimms_list = llist_create();
613 int err = pthread_mutex_init(&g_mcelog_config.dimms_lock, NULL);
614 if (err < 0) {
615 ERROR(MCELOG_PLUGIN ": plugin: failed to initialize cache lock");
616 return (-1);
617 }
619 if (socket_adapter.reinit(&socket_adapter) != 0) {
620 ERROR(MCELOG_PLUGIN ": Cannot connect to client socket");
621 return (-1);
622 }
624 if (plugin_thread_create(&g_mcelog_config.tid, NULL, poll_worker, NULL,
625 NULL) != 0) {
626 ERROR(MCELOG_PLUGIN ": Error creating poll thread.");
627 return (-1);
628 }
629 return (0);
630 }
632 static int get_memory_machine_checks(void) {
633 static const char dump[] = "dump all bios\n";
634 int ret = socket_adapter.write(&socket_adapter, dump, sizeof(dump));
635 if (ret != 0)
636 ERROR(MCELOG_PLUGIN ": SENT DUMP REQUEST FAILED");
637 else
638 DEBUG(MCELOG_PLUGIN ": SENT DUMP REQUEST OK");
639 return (ret);
640 }
642 static int mcelog_read(__attribute__((unused)) user_data_t *ud) {
643 DEBUG(MCELOG_PLUGIN ": %s", __FUNCTION__);
645 if (get_memory_machine_checks() != 0)
646 ERROR(MCELOG_PLUGIN ": MACHINE CHECK INFO NOT AVAILABLE");
648 return (0);
649 }
651 static int mcelog_shutdown(void) {
652 int ret = 0;
653 if (mcelog_thread_running) {
654 pthread_cancel(g_mcelog_config.tid);
655 if (pthread_join(g_mcelog_config.tid, NULL) != 0) {
656 ERROR(MCELOG_PLUGIN ": Stopping thread failed.");
657 ret = -1;
658 }
659 }
660 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
661 mcelog_free_dimms_list_records(g_mcelog_config.dimms_list);
662 llist_destroy(g_mcelog_config.dimms_list);
663 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
664 pthread_mutex_destroy(&g_mcelog_config.dimms_lock);
665 g_mcelog_config.dimms_list = NULL;
666 ret = socket_adapter.close(&socket_adapter) || ret;
667 pthread_rwlock_destroy(&(socket_adapter.lock));
668 return (-ret);
669 }
671 void module_register(void) {
672 plugin_register_complex_config(MCELOG_PLUGIN, mcelog_config);
673 plugin_register_init(MCELOG_PLUGIN, mcelog_init);
674 plugin_register_complex_read(NULL, MCELOG_PLUGIN, mcelog_read, 0, NULL);
675 plugin_register_shutdown(MCELOG_PLUGIN, mcelog_shutdown);
676 }