1 /*-
2 * collectd - src/mcelog.c
3 * MIT License
4 *
5 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
25 * Authors:
26 * Maryam Tahhan <maryam.tahhan@intel.com>
27 * Volodymyr Mytnyk <volodymyrx.mytnyk@intel.com>
28 * Taras Chornyi <tarasx.chornyi@intel.com>
29 * Krzysztof Matczak <krzysztofx.matczak@intel.com>
30 */
32 #include "collectd.h"
34 #include "common.h"
35 #include "utils_llist.h"
37 #include <poll.h>
38 #include <sys/socket.h>
39 #include <sys/un.h>
40 #include <unistd.h>
42 #define MCELOG_PLUGIN "mcelog"
43 #define MCELOG_BUFF_SIZE 1024
44 #define MCELOG_POLL_TIMEOUT 1000 /* ms */
45 #define MCELOG_SOCKET_STR "SOCKET"
46 #define MCELOG_DIMM_NAME "DMI_NAME"
47 #define MCELOG_CORRECTED_ERR "corrected memory errors"
48 #define MCELOG_UNCORRECTED_ERR "uncorrected memory errors"
49 #define MCELOG_CORRECTED_ERR_TIMED "corrected memory timed errors"
50 #define MCELOG_UNCORRECTED_ERR_TIMED "uncorrected memory timed errors"
51 #define MCELOG_CORRECTED_ERR_TYPE_INS "corrected_memory_errors"
52 #define MCELOG_UNCORRECTED_ERR_TYPE_INS "uncorrected_memory_errors"
54 typedef struct mcelog_config_s {
55 char logfile[PATH_MAX]; /* mcelog logfile */
56 pthread_t tid; /* poll thread id */
57 llist_t *dimms_list; /* DIMMs list */
58 pthread_mutex_t dimms_lock; /* lock for dimms cache */
59 _Bool persist;
60 } mcelog_config_t;
62 typedef struct socket_adapter_s socket_adapter_t;
64 struct socket_adapter_s {
65 int sock_fd; /* mcelog server socket fd */
66 struct sockaddr_un unix_sock; /* mcelog client socket */
67 pthread_rwlock_t lock;
68 /* function pointers for socket operations */
69 int (*write)(socket_adapter_t *self, const char *msg, const size_t len);
70 int (*reinit)(socket_adapter_t *self);
71 int (*receive)(socket_adapter_t *self, FILE **p_file);
72 int (*close)(socket_adapter_t *self);
73 };
75 typedef struct mcelog_memory_rec_s {
76 int corrected_err_total; /* x total*/
77 int corrected_err_timed; /* x in 24h*/
78 char corrected_err_timed_period[DATA_MAX_NAME_LEN/2];
79 int uncorrected_err_total; /* x total*/
80 int uncorrected_err_timed; /* x in 24h*/
81 char uncorrected_err_timed_period[DATA_MAX_NAME_LEN/2];
82 char location[DATA_MAX_NAME_LEN/2]; /* SOCKET x CHANNEL x DIMM x*/
83 char dimm_name[DATA_MAX_NAME_LEN/2]; /* DMI_NAME "DIMM_F1" */
84 } mcelog_memory_rec_t;
86 static int socket_close(socket_adapter_t *self);
87 static int socket_write(socket_adapter_t *self, const char *msg,
88 const size_t len);
89 static int socket_reinit(socket_adapter_t *self);
90 static int socket_receive(socket_adapter_t *self, FILE **p_file);
92 static mcelog_config_t g_mcelog_config = {
93 .logfile = "/var/log/mcelog", .persist = 0,
94 };
96 static socket_adapter_t socket_adapter = {
97 .sock_fd = -1,
98 .unix_sock =
99 {
100 .sun_family = AF_UNIX, .sun_path = "/var/run/mcelog-client",
101 },
102 .lock = PTHREAD_RWLOCK_INITIALIZER,
103 .close = socket_close,
104 .write = socket_write,
105 .reinit = socket_reinit,
106 .receive = socket_receive,
107 };
109 static _Bool mcelog_thread_running;
110 static _Bool mcelog_apply_defaults;
112 static void mcelog_free_dimms_list_records(llist_t *dimms_list) {
114 for (llentry_t *e = llist_head(dimms_list); e != NULL; e = e->next) {
115 sfree(e->key);
116 sfree(e->value);
117 }
118 }
120 /* Create or get dimm by dimm name/location */
121 static llentry_t *mcelog_dimm(const mcelog_memory_rec_t *rec,
122 llist_t *dimms_list) {
124 char dimm_name[DATA_MAX_NAME_LEN];
126 if (strlen(rec->dimm_name) > 0) {
127 snprintf(dimm_name, sizeof(dimm_name), "%s_%s", rec->location,
128 rec->dimm_name);
129 } else
130 sstrncpy(dimm_name, rec->location, sizeof(dimm_name));
132 llentry_t *dimm_le = llist_search(g_mcelog_config.dimms_list, dimm_name);
134 if (dimm_le != NULL)
135 return dimm_le;
137 /* allocate new linked list entry */
138 mcelog_memory_rec_t *dimm_mr = calloc(1, sizeof(*dimm_mr));
139 if (dimm_mr == NULL) {
140 ERROR(MCELOG_PLUGIN ": Error allocating dimm memory item");
141 return NULL;
142 }
143 char *p_name = strdup(dimm_name);
144 if (p_name == NULL) {
145 ERROR(MCELOG_PLUGIN ": strdup: error");
146 free(dimm_mr);
147 return NULL;
148 }
150 /* add new dimm */
151 dimm_le = llentry_create(p_name, dimm_mr);
152 if (dimm_le == NULL) {
153 ERROR(MCELOG_PLUGIN ": llentry_create(): error");
154 free(dimm_mr);
155 free(p_name);
156 return NULL;
157 }
158 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
159 llist_append(g_mcelog_config.dimms_list, dimm_le);
160 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
162 return dimm_le;
163 }
165 static void mcelog_update_dimm_stats(llentry_t *dimm,
166 const mcelog_memory_rec_t *rec) {
167 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
168 memcpy(dimm->value, rec, sizeof(mcelog_memory_rec_t));
169 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
170 }
172 static int mcelog_config(oconfig_item_t *ci) {
173 int use_logfile = 0, use_memory = 0;
174 for (int i = 0; i < ci->children_num; i++) {
175 oconfig_item_t *child = ci->children + i;
176 if (strcasecmp("McelogLogfile", child->key) == 0) {
177 use_logfile = 1;
178 if (use_memory) {
179 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Memory "
180 "option is already configured.",
181 child->key);
182 return -1;
183 }
184 if (cf_util_get_string_buffer(child, g_mcelog_config.logfile,
185 sizeof(g_mcelog_config.logfile)) < 0) {
186 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
187 child->key);
188 return -1;
189 }
190 memset(socket_adapter.unix_sock.sun_path, 0,
191 sizeof(socket_adapter.unix_sock.sun_path));
192 } else if (strcasecmp("Memory", child->key) == 0) {
193 if (use_logfile) {
194 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Logfile "
195 "option is already configured.",
196 child->key);
197 return -1;
198 }
199 use_memory = 1;
200 for (int j = 0; j < child->children_num; j++) {
201 oconfig_item_t *mem_child = child->children + j;
202 if (strcasecmp("McelogClientSocket", mem_child->key) == 0) {
203 if (cf_util_get_string_buffer(
204 mem_child, socket_adapter.unix_sock.sun_path,
205 sizeof(socket_adapter.unix_sock.sun_path)) < 0) {
206 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
207 mem_child->key);
208 return -1;
209 }
210 } else if (strcasecmp("PersistentNotification", mem_child->key) == 0) {
211 if (cf_util_get_boolean(mem_child, &g_mcelog_config.persist) < 0) {
212 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
213 mem_child->key);
214 return -1;
215 }
216 } else {
217 ERROR(MCELOG_PLUGIN ": Invalid Memory configuration option: \"%s\".",
218 mem_child->key);
219 return -1;
220 }
221 }
222 memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile));
223 } else {
224 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
225 child->key);
226 return -1;
227 }
228 }
230 if (!use_logfile && !use_memory)
231 mcelog_apply_defaults = 1;
233 return 0;
234 }
236 static int socket_close(socket_adapter_t *self) {
237 int ret = 0;
238 pthread_rwlock_rdlock(&self->lock);
239 if (fcntl(self->sock_fd, F_GETFL) != -1) {
240 char errbuf[MCELOG_BUFF_SIZE];
241 if (shutdown(self->sock_fd, SHUT_RDWR) != 0) {
242 ERROR(MCELOG_PLUGIN ": Socket shutdown failed: %s",
243 sstrerror(errno, errbuf, sizeof(errbuf)));
244 ret = -1;
245 }
246 if (close(self->sock_fd) != 0) {
247 ERROR(MCELOG_PLUGIN ": Socket close failed: %s",
248 sstrerror(errno, errbuf, sizeof(errbuf)));
249 ret = -1;
250 }
251 }
252 pthread_rwlock_unlock(&self->lock);
253 return ret;
254 }
256 static int socket_write(socket_adapter_t *self, const char *msg,
257 const size_t len) {
258 int ret = 0;
259 pthread_rwlock_rdlock(&self->lock);
260 if (swrite(self->sock_fd, msg, len) != 0)
261 ret = -1;
262 pthread_rwlock_unlock(&self->lock);
263 return ret;
264 }
266 static void mcelog_dispatch_notification(notification_t *n) {
267 if (!n) {
268 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
269 return;
270 }
272 sstrncpy(n->host, hostname_g, sizeof(n->host));
273 sstrncpy(n->type, "gauge", sizeof(n->type));
274 plugin_dispatch_notification(n);
275 if (n->meta)
276 plugin_notification_meta_free(n->meta);
277 }
279 static int socket_reinit(socket_adapter_t *self) {
280 char errbuff[MCELOG_BUFF_SIZE];
281 int ret = -1;
282 cdtime_t interval = plugin_get_interval();
283 struct timeval socket_timeout = CDTIME_T_TO_TIMEVAL(interval);
285 /* synchronization via write lock since sock_fd may be changed here */
286 pthread_rwlock_wrlock(&self->lock);
287 self->sock_fd =
288 socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
289 if (self->sock_fd < 0) {
290 ERROR(MCELOG_PLUGIN ": Could not create a socket. %s",
291 sstrerror(errno, errbuff, sizeof(errbuff)));
292 pthread_rwlock_unlock(&self->lock);
293 return ret;
294 }
296 /* Set socket timeout option */
297 if (setsockopt(self->sock_fd, SOL_SOCKET, SO_SNDTIMEO, &socket_timeout,
298 sizeof(socket_timeout)) < 0)
299 ERROR(MCELOG_PLUGIN ": Failed to set the socket timeout option.");
301 /* downgrading to read lock due to possible recursive read locks
302 * in self->close(self) call */
303 pthread_rwlock_unlock(&self->lock);
304 pthread_rwlock_rdlock(&self->lock);
305 if (connect(self->sock_fd, (struct sockaddr *)&(self->unix_sock),
306 sizeof(self->unix_sock)) < 0) {
307 ERROR(MCELOG_PLUGIN ": Failed to connect to mcelog server. %s",
308 sstrerror(errno, errbuff, sizeof(errbuff)));
309 self->close(self);
310 ret = -1;
311 } else {
312 ret = 0;
313 mcelog_dispatch_notification(
314 &(notification_t){.severity = NOTIF_OKAY,
315 .time = cdtime(),
316 .message = "Connected to mcelog server",
317 .plugin = MCELOG_PLUGIN,
318 .type_instance = "mcelog_status"});
319 }
320 pthread_rwlock_unlock(&self->lock);
321 return ret;
322 }
324 static int mcelog_dispatch_mem_notifications(const mcelog_memory_rec_t *mr) {
325 notification_t n = {.severity = NOTIF_WARNING,
326 .time = cdtime(),
327 .plugin = MCELOG_PLUGIN,
328 .type = "errors"};
330 int dispatch_corrected_notifs = 0, dispatch_uncorrected_notifs = 0;
332 if (mr == NULL)
333 return -1;
335 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
336 if (dimm == NULL) {
337 ERROR(MCELOG_PLUGIN
338 ": Error adding/getting dimm memory item to/from cache");
339 return -1;
340 }
341 mcelog_memory_rec_t *mr_old = dimm->value;
342 if (!g_mcelog_config.persist) {
344 if (mr_old->corrected_err_total != mr->corrected_err_total ||
345 mr_old->corrected_err_timed != mr->corrected_err_timed)
346 dispatch_corrected_notifs = 1;
348 if (mr_old->uncorrected_err_total != mr->uncorrected_err_total ||
349 mr_old->uncorrected_err_timed != mr->uncorrected_err_timed)
350 dispatch_uncorrected_notifs = 1;
352 if (!dispatch_corrected_notifs && !dispatch_uncorrected_notifs) {
353 DEBUG("%s: No new notifications to dispatch", MCELOG_PLUGIN);
354 return 0;
355 }
356 } else {
357 dispatch_corrected_notifs = 1;
358 dispatch_uncorrected_notifs = 1;
359 }
361 sstrncpy(n.host, hostname_g, sizeof(n.host));
363 if (mr->dimm_name[0] != '\0')
364 snprintf(n.plugin_instance, sizeof(n.plugin_instance), "%s_%s",
365 mr->location, mr->dimm_name);
366 else
367 sstrncpy(n.plugin_instance, mr->location, sizeof(n.plugin_instance));
369 if (dispatch_corrected_notifs &&
370 (mr->corrected_err_total > 0 || mr->corrected_err_timed > 0)) {
371 /* Corrected Error Notifications */
372 plugin_notification_meta_add_signed_int(&n, MCELOG_CORRECTED_ERR,
373 mr->corrected_err_total);
374 plugin_notification_meta_add_signed_int(&n, MCELOG_CORRECTED_ERR_TIMED,
375 mr->corrected_err_timed);
376 snprintf(n.message, sizeof(n.message), MCELOG_CORRECTED_ERR);
377 sstrncpy(n.type_instance, MCELOG_CORRECTED_ERR_TYPE_INS,
378 sizeof(n.type_instance));
379 plugin_dispatch_notification(&n);
380 if (n.meta)
381 plugin_notification_meta_free(n.meta);
382 n.meta = NULL;
383 }
385 if (dispatch_uncorrected_notifs &&
386 (mr->uncorrected_err_total > 0 || mr->uncorrected_err_timed > 0)) {
387 /* Uncorrected Error Notifications */
388 plugin_notification_meta_add_signed_int(&n, MCELOG_UNCORRECTED_ERR,
389 mr->uncorrected_err_total);
390 plugin_notification_meta_add_signed_int(&n, MCELOG_UNCORRECTED_ERR_TIMED,
391 mr->uncorrected_err_timed);
392 snprintf(n.message, sizeof(n.message), MCELOG_UNCORRECTED_ERR);
393 sstrncpy(n.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
394 sizeof(n.type_instance));
395 n.severity = NOTIF_FAILURE;
396 plugin_dispatch_notification(&n);
397 if (n.meta)
398 plugin_notification_meta_free(n.meta);
399 n.meta = NULL;
400 }
402 return 0;
403 }
405 static int mcelog_submit(const mcelog_memory_rec_t *mr) {
407 if (!mr) {
408 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
409 return -1;
410 }
412 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
413 if (dimm == NULL) {
414 ERROR(MCELOG_PLUGIN
415 ": Error adding/getting dimm memory item to/from cache");
416 return -1;
417 }
419 value_list_t vl = {
420 .values_len = 1,
421 .values = &(value_t){.derive = (derive_t)mr->corrected_err_total},
422 .time = cdtime(),
423 .plugin = MCELOG_PLUGIN,
424 .type = "errors",
425 .type_instance = MCELOG_CORRECTED_ERR_TYPE_INS};
427 mcelog_update_dimm_stats(dimm, mr);
429 if (mr->dimm_name[0] != '\0')
430 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s_%s",
431 mr->location, mr->dimm_name);
432 else
433 sstrncpy(vl.plugin_instance, mr->location, sizeof(vl.plugin_instance));
435 plugin_dispatch_values(&vl);
437 snprintf(vl.type_instance, sizeof(vl.type_instance),
438 "corrected_memory_errors_in_%s", mr->corrected_err_timed_period);
439 vl.values = &(value_t){.derive = (derive_t)mr->corrected_err_timed};
440 plugin_dispatch_values(&vl);
442 sstrncpy(vl.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
443 sizeof(vl.type_instance));
444 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_total};
445 plugin_dispatch_values(&vl);
447 snprintf(vl.type_instance, sizeof(vl.type_instance),
448 "uncorrected_memory_errors_in_%s", mr->uncorrected_err_timed_period);
449 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_timed};
450 plugin_dispatch_values(&vl);
452 return 0;
453 }
455 static int parse_memory_info(FILE *p_file, mcelog_memory_rec_t *memory_record) {
456 char buf[DATA_MAX_NAME_LEN] = {0};
457 while (fgets(buf, sizeof(buf), p_file)) {
458 /* Got empty line or "done" */
459 if ((!strncmp("\n", buf, strlen(buf))) ||
460 (!strncmp(buf, "done\n", strlen(buf))))
461 return 1;
462 if (strlen(buf) < 5)
463 continue;
464 if (!strncmp(buf, MCELOG_SOCKET_STR, strlen(MCELOG_SOCKET_STR))) {
465 sstrncpy(memory_record->location, buf, strlen(buf));
466 /* replace spaces with '_' */
467 for (size_t i = 0; i < strlen(memory_record->location); i++)
468 if (memory_record->location[i] == ' ')
469 memory_record->location[i] = '_';
470 DEBUG(MCELOG_PLUGIN ": Got SOCKET INFO %s", memory_record->location);
471 }
472 if (!strncmp(buf, MCELOG_DIMM_NAME, strlen(MCELOG_DIMM_NAME))) {
473 char *name = NULL;
474 char *saveptr = NULL;
475 name = strtok_r(buf, "\"", &saveptr);
476 if (name != NULL && saveptr != NULL) {
477 name = strtok_r(NULL, "\"", &saveptr);
478 if (name != NULL) {
479 sstrncpy(memory_record->dimm_name, name,
480 sizeof(memory_record->dimm_name));
481 DEBUG(MCELOG_PLUGIN ": Got DIMM NAME %s", memory_record->dimm_name);
482 }
483 }
484 }
485 if (!strncmp(buf, MCELOG_CORRECTED_ERR, strlen(MCELOG_CORRECTED_ERR))) {
486 /* Get next line*/
487 if (fgets(buf, sizeof(buf), p_file) != NULL) {
488 sscanf(buf, "\t%d total", &(memory_record->corrected_err_total));
489 DEBUG(MCELOG_PLUGIN ": Got corrected error total %d",
490 memory_record->corrected_err_total);
491 }
492 if (fgets(buf, sizeof(buf), p_file) != NULL) {
493 sscanf(buf, "\t%d in %s", &(memory_record->corrected_err_timed),
494 memory_record->corrected_err_timed_period);
495 DEBUG(MCELOG_PLUGIN ": Got timed corrected errors %d in %s",
496 memory_record->corrected_err_total,
497 memory_record->corrected_err_timed_period);
498 }
499 }
500 if (!strncmp(buf, MCELOG_UNCORRECTED_ERR, strlen(MCELOG_UNCORRECTED_ERR))) {
501 if (fgets(buf, sizeof(buf), p_file) != NULL) {
502 sscanf(buf, "\t%d total", &(memory_record->uncorrected_err_total));
503 DEBUG(MCELOG_PLUGIN ": Got uncorrected error total %d",
504 memory_record->uncorrected_err_total);
505 }
506 if (fgets(buf, sizeof(buf), p_file) != NULL) {
507 sscanf(buf, "\t%d in %s", &(memory_record->uncorrected_err_timed),
508 memory_record->uncorrected_err_timed_period);
509 DEBUG(MCELOG_PLUGIN ": Got timed uncorrected errors %d in %s",
510 memory_record->uncorrected_err_total,
511 memory_record->uncorrected_err_timed_period);
512 }
513 }
514 memset(buf, 0, sizeof(buf));
515 }
516 /* parsing definitely finished */
517 return 0;
518 }
520 static void poll_worker_cleanup(void *arg) {
521 mcelog_thread_running = 0;
522 FILE *p_file = *((FILE **)arg);
523 if (p_file != NULL)
524 fclose(p_file);
525 free(arg);
526 }
528 static int socket_receive(socket_adapter_t *self, FILE **pp_file) {
529 int res = -1;
530 pthread_rwlock_rdlock(&self->lock);
531 struct pollfd poll_fd = {
532 .fd = self->sock_fd, .events = POLLIN | POLLPRI,
533 };
535 if ((res = poll(&poll_fd, 1, MCELOG_POLL_TIMEOUT)) <= 0) {
536 if (res != 0 && errno != EINTR) {
537 char errbuf[MCELOG_BUFF_SIZE];
538 ERROR("mcelog: poll failed: %s",
539 sstrerror(errno, errbuf, sizeof(errbuf)));
540 }
541 pthread_rwlock_unlock(&self->lock);
542 return res;
543 }
545 if (poll_fd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
546 /* connection is broken */
547 ERROR(MCELOG_PLUGIN ": Connection to socket is broken");
548 if (poll_fd.revents & (POLLERR | POLLHUP)) {
549 mcelog_dispatch_notification(
550 &(notification_t){.severity = NOTIF_FAILURE,
551 .time = cdtime(),
552 .message = "Connection to mcelog socket is broken.",
553 .plugin = MCELOG_PLUGIN,
554 .type_instance = "mcelog_status"});
555 }
556 pthread_rwlock_unlock(&self->lock);
557 return -1;
558 }
560 if (!(poll_fd.revents & (POLLIN | POLLPRI))) {
561 INFO(MCELOG_PLUGIN ": No data to read");
562 pthread_rwlock_unlock(&self->lock);
563 return 0;
564 }
566 if ((*pp_file = fdopen(dup(self->sock_fd), "r")) == NULL)
567 res = -1;
569 pthread_rwlock_unlock(&self->lock);
570 return res;
571 }
573 static void *poll_worker(__attribute__((unused)) void *arg) {
574 char errbuf[MCELOG_BUFF_SIZE];
575 mcelog_thread_running = 1;
576 FILE **pp_file = calloc(1, sizeof(*pp_file));
577 if (pp_file == NULL) {
578 ERROR("mcelog: memory allocation failed: %s",
579 sstrerror(errno, errbuf, sizeof(errbuf)));
580 pthread_exit((void *)1);
581 }
583 pthread_cleanup_push(poll_worker_cleanup, pp_file);
585 while (1) {
586 /* blocking call */
587 int res = socket_adapter.receive(&socket_adapter, pp_file);
588 if (res < 0) {
589 socket_adapter.close(&socket_adapter);
590 while (socket_adapter.reinit(&socket_adapter) != 0) {
591 nanosleep(&CDTIME_T_TO_TIMESPEC(MS_TO_CDTIME_T(MCELOG_POLL_TIMEOUT)),
592 NULL);
593 }
594 continue;
595 }
596 /* timeout or no data to read */
597 else if (res == 0)
598 continue;
600 if (*pp_file == NULL)
601 continue;
603 mcelog_memory_rec_t memory_record = {0};
604 while (parse_memory_info(*pp_file, &memory_record)) {
605 /* Check if location was successfully parsed */
606 if (memory_record.location[0] == '\0') {
607 memset(&memory_record, 0, sizeof(memory_record));
608 continue;
609 }
611 if (mcelog_dispatch_mem_notifications(&memory_record) != 0)
612 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors notification");
613 if (mcelog_submit(&memory_record) != 0)
614 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors");
615 memset(&memory_record, 0, sizeof(memory_record));
616 }
618 fclose(*pp_file);
619 *pp_file = NULL;
620 }
622 mcelog_thread_running = 0;
623 pthread_cleanup_pop(1);
624 return NULL;
625 }
627 static int mcelog_init(void) {
628 if (mcelog_apply_defaults) {
629 INFO(MCELOG_PLUGIN
630 ": No configuration selected defaulting to memory errors.");
631 memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile));
632 }
633 g_mcelog_config.dimms_list = llist_create();
634 int err = pthread_mutex_init(&g_mcelog_config.dimms_lock, NULL);
635 if (err < 0) {
636 ERROR(MCELOG_PLUGIN ": plugin: failed to initialize cache lock");
637 return -1;
638 }
640 if (socket_adapter.reinit(&socket_adapter) != 0) {
641 ERROR(MCELOG_PLUGIN ": Cannot connect to client socket");
642 return -1;
643 }
645 if (strlen(socket_adapter.unix_sock.sun_path)) {
646 if (plugin_thread_create(&g_mcelog_config.tid, NULL, poll_worker, NULL,
647 NULL) != 0) {
648 ERROR(MCELOG_PLUGIN ": Error creating poll thread.");
649 return -1;
650 }
651 }
652 return 0;
653 }
655 static int get_memory_machine_checks(void) {
656 static const char dump[] = "dump all bios\n";
657 int ret = socket_adapter.write(&socket_adapter, dump, sizeof(dump));
658 if (ret != 0)
659 ERROR(MCELOG_PLUGIN ": SENT DUMP REQUEST FAILED");
660 else
661 DEBUG(MCELOG_PLUGIN ": SENT DUMP REQUEST OK");
662 return ret;
663 }
665 static int mcelog_read(__attribute__((unused)) user_data_t *ud) {
666 DEBUG(MCELOG_PLUGIN ": %s", __FUNCTION__);
668 if (get_memory_machine_checks() != 0)
669 ERROR(MCELOG_PLUGIN ": MACHINE CHECK INFO NOT AVAILABLE");
671 return 0;
672 }
674 static int mcelog_shutdown(void) {
675 int ret = 0;
676 if (mcelog_thread_running) {
677 pthread_cancel(g_mcelog_config.tid);
678 if (pthread_join(g_mcelog_config.tid, NULL) != 0) {
679 ERROR(MCELOG_PLUGIN ": Stopping thread failed.");
680 ret = -1;
681 }
682 }
683 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
684 mcelog_free_dimms_list_records(g_mcelog_config.dimms_list);
685 llist_destroy(g_mcelog_config.dimms_list);
686 g_mcelog_config.dimms_list = NULL;
687 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
688 pthread_mutex_destroy(&g_mcelog_config.dimms_lock);
689 ret = socket_adapter.close(&socket_adapter) || ret;
690 pthread_rwlock_destroy(&(socket_adapter.lock));
691 return -ret;
692 }
694 void module_register(void) {
695 plugin_register_complex_config(MCELOG_PLUGIN, mcelog_config);
696 plugin_register_init(MCELOG_PLUGIN, mcelog_init);
697 plugin_register_complex_read(NULL, MCELOG_PLUGIN, mcelog_read, 0, NULL);
698 plugin_register_shutdown(MCELOG_PLUGIN, mcelog_shutdown);
699 }