1 /**
2 * collectd - src/processes.c
3 * Copyright (C) 2005 Lyonel Vincent
4 * Copyright (C) 2006 Florian Forster (Mach code)
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2 of the License, or (at your
9 * option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors:
21 * Lyonel Vincent <lyonel at ezix.org>
22 * Florian octo Forster <octo at verplant.org>
23 **/
25 #include "collectd.h"
26 #include "common.h"
27 #include "plugin.h"
28 #include "utils_debug.h"
29 #include "configfile.h"
31 /* Include header files for the mach system, if they exist.. */
32 #if HAVE_THREAD_INFO
33 # if HAVE_MACH_MACH_INIT_H
34 # include <mach/mach_init.h>
35 # endif
36 # if HAVE_MACH_HOST_PRIV_H
37 # include <mach/host_priv.h>
38 # endif
39 # if HAVE_MACH_MACH_ERROR_H
40 # include <mach/mach_error.h>
41 # endif
42 # if HAVE_MACH_MACH_HOST_H
43 # include <mach/mach_host.h>
44 # endif
45 # if HAVE_MACH_MACH_PORT_H
46 # include <mach/mach_port.h>
47 # endif
48 # if HAVE_MACH_MACH_TYPES_H
49 # include <mach/mach_types.h>
50 # endif
51 # if HAVE_MACH_MESSAGE_H
52 # include <mach/message.h>
53 # endif
54 # if HAVE_MACH_PROCESSOR_SET_H
55 # include <mach/processor_set.h>
56 # endif
57 # if HAVE_MACH_TASK_H
58 # include <mach/task.h>
59 # endif
60 # if HAVE_MACH_THREAD_ACT_H
61 # include <mach/thread_act.h>
62 # endif
63 # if HAVE_MACH_VM_REGION_H
64 # include <mach/vm_region.h>
65 # endif
66 # if HAVE_MACH_VM_MAP_H
67 # include <mach/vm_map.h>
68 # endif
69 # if HAVE_MACH_VM_PROT_H
70 # include <mach/vm_prot.h>
71 # endif
72 /* #endif HAVE_THREAD_INFO */
74 #elif KERNEL_LINUX
75 # if HAVE_LINUX_CONFIG_H
76 # include <linux/config.h>
77 # endif
78 # ifndef CONFIG_HZ
79 # define CONFIG_HZ 100
80 # endif
81 #endif /* KERNEL_LINUX */
83 #define MODULE_NAME "processes"
85 #if HAVE_THREAD_INFO || KERNEL_LINUX
86 # define PROCESSES_HAVE_READ 1
87 #else
88 # define PROCESSES_HAVE_READ 0
89 #endif
91 #define BUFSIZE 256
93 static char *processes_file = "processes.rrd";
94 static char *processes_ds_def[] =
95 {
96 "DS:running:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
97 "DS:sleeping:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
98 "DS:zombies:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
99 "DS:stopped:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
100 "DS:paging:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
101 "DS:blocked:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
102 NULL
103 };
104 static int processes_ds_num = 6;
106 static char *ps_rss_file = "processes/ps_rss-%s.rrd";
107 static char *ps_rss_ds_def[] =
108 {
109 /* max = 2^63 - 1 */
110 "DS:byte:GAUGE:"COLLECTD_HEARTBEAT":0:9223372036854775807",
111 NULL
112 };
113 static int ps_rss_ds_num = 1;
115 static char *ps_cputime_file = "processes/ps_cputime-%s.rrd";
116 static char *ps_cputime_ds_def[] =
117 {
118 /* 1 second in user-mode per second ought to be enough.. */
119 "DS:user:COUNTER:"COLLECTD_HEARTBEAT":0:1000000",
120 "DS:syst:COUNTER:"COLLECTD_HEARTBEAT":0:1000000",
121 NULL
122 };
123 static int ps_cputime_ds_num = 2;
125 static char *config_keys[] =
126 {
127 "CollectName",
128 NULL
129 };
130 static int config_keys_num = 1;
132 typedef struct procstat
133 {
134 #define PROCSTAT_NAME_LEN 256
135 char name[PROCSTAT_NAME_LEN];
136 unsigned int num_proc;
137 unsigned int num_lwp;
138 unsigned long vmem_rss;
139 unsigned long vmem_minflt;
140 unsigned long vmem_majflt;
141 unsigned long long cpu_user;
142 unsigned long long cpu_system;
143 struct procstat *next;
144 } procstat_t;
146 static procstat_t *list_head_g = NULL;
148 #if HAVE_THREAD_INFO
149 static mach_port_t port_host_self;
150 static mach_port_t port_task_self;
152 static processor_set_name_array_t pset_list;
153 static mach_msg_type_number_t pset_list_len;
154 /* #endif HAVE_THREAD_INFO */
156 #elif KERNEL_LINUX
157 static long pagesize_g;
158 #endif /* KERNEL_LINUX */
160 static procstat_t *ps_list_append (procstat_t *list, const char *name)
161 {
162 procstat_t *new;
163 procstat_t *ptr;
165 if ((new = (procstat_t *) malloc (sizeof (procstat_t))) == NULL)
166 return (NULL);
167 memset (new, 0, sizeof (procstat_t));
168 strncpy (new->name, name, PROCSTAT_NAME_LEN);
170 for (ptr = list; ptr != NULL; ptr = ptr->next)
171 if (ptr->next == NULL)
172 break;
174 if (ptr != NULL)
175 ptr->next = new;
177 return (new);
178 }
180 static void ps_list_add (procstat_t *list, procstat_t *entry)
181 {
182 procstat_t *ptr;
184 ptr = list;
185 while ((ptr != NULL) && (strcmp (ptr->name, entry->name) != 0))
186 ptr = ptr->next;
188 if (ptr == NULL)
189 return;
191 ptr->num_proc += entry->num_proc;
192 ptr->num_lwp += entry->num_lwp;
193 ptr->vmem_rss += entry->vmem_rss;
194 ptr->vmem_minflt += entry->vmem_minflt;
195 ptr->vmem_majflt += entry->vmem_majflt;
196 ptr->cpu_user += entry->cpu_user;
197 ptr->cpu_system += entry->cpu_system;
198 }
200 static void ps_list_reset (procstat_t *ps)
201 {
202 while (ps != NULL)
203 {
204 ps->num_proc = 0;
205 ps->num_lwp = 0;
206 ps->vmem_rss = 0;
207 ps->vmem_minflt = 0;
208 ps->vmem_majflt = 0;
209 ps->cpu_user = 0;
210 ps->cpu_system = 0;
211 ps = ps->next;
212 }
213 }
215 static int ps_config (char *key, char *value)
216 {
217 if (strcasecmp (key, "CollectName") == 0)
218 {
219 procstat_t *entry;
221 entry = ps_list_append (list_head_g, value);
222 if (entry == NULL)
223 {
224 syslog (LOG_ERR, "processes plugin: ps_list_append failed.");
225 return (1);
226 }
227 if (list_head_g == NULL)
228 list_head_g = entry;
229 }
230 else
231 {
232 return (-1);
233 }
235 return (0);
236 }
238 static void ps_init (void)
239 {
240 #if HAVE_THREAD_INFO
241 kern_return_t status;
243 port_host_self = mach_host_self ();
244 port_task_self = mach_task_self ();
246 if (pset_list != NULL)
247 {
248 vm_deallocate (port_task_self,
249 (vm_address_t) pset_list,
250 pset_list_len * sizeof (processor_set_t));
251 pset_list = NULL;
252 pset_list_len = 0;
253 }
255 if ((status = host_processor_sets (port_host_self,
256 &pset_list,
257 &pset_list_len)) != KERN_SUCCESS)
258 {
259 syslog (LOG_ERR, "host_processor_sets failed: %s\n",
260 mach_error_string (status));
261 pset_list = NULL;
262 pset_list_len = 0;
263 return;
264 }
265 /* #endif HAVE_THREAD_INFO */
267 #elif KERNEL_LINUX
268 pagesize_g = sysconf(_SC_PAGESIZE);
269 DBG ("pagesize_g = %li; CONFIG_HZ = %i;",
270 pagesize_g, CONFIG_HZ);
271 #endif /* KERNEL_LINUX */
273 return;
274 }
276 static void ps_write (char *host, char *inst, char *val)
277 {
278 rrd_update_file (host, processes_file, val,
279 processes_ds_def, processes_ds_num);
280 }
282 static void ps_rss_write (char *host, char *inst, char *val)
283 {
284 char filename[256];
285 int status;
287 status = snprintf (filename, 256, ps_rss_file, inst);
288 if ((status < 1) || (status >= 256))
289 return;
291 rrd_update_file (host, filename, val, ps_rss_ds_def, ps_rss_ds_num);
292 }
294 static void ps_cputime_write (char *host, char *inst, char *val)
295 {
296 char filename[256];
297 int status;
299 status = snprintf (filename, 256, ps_cputime_file, inst);
300 if ((status < 1) || (status >= 256))
301 return;
303 DBG ("host = %s; filename = %s; val = %s;",
304 host, filename, val);
305 rrd_update_file (host, filename, val,
306 ps_cputime_ds_def, ps_cputime_ds_num);
307 }
309 #if PROCESSES_HAVE_READ
310 static void ps_submit (int running,
311 int sleeping,
312 int zombies,
313 int stopped,
314 int paging,
315 int blocked)
316 {
317 char buf[BUFSIZE];
319 if (snprintf (buf, BUFSIZE, "%u:%i:%i:%i:%i:%i:%i",
320 (unsigned int) curtime,
321 running, sleeping, zombies, stopped, paging,
322 blocked) >= BUFSIZE)
323 return;
325 DBG ("running = %i; sleeping = %i; zombies = %i; stopped = %i; paging = %i; blocked = %i;",
326 running, sleeping, zombies, stopped, paging, blocked);
328 plugin_submit (MODULE_NAME, "-", buf);
329 }
331 static void ps_submit_proc (procstat_t *ps)
332 {
333 char buffer[64];
335 if (ps == NULL)
336 return;
338 snprintf (buffer, 64, "%u:%lu",
339 (unsigned int) curtime,
340 ps->vmem_rss);
341 buffer[63] = '\0';
342 plugin_submit ("ps_rss", ps->name, buffer);
344 snprintf (buffer, 64, "%u:%u:%u",
345 (unsigned int) curtime,
346 /* Make the counter overflow */
347 (unsigned int) (ps->cpu_user & 0xFFFFFFFF),
348 (unsigned int) (ps->cpu_system & 0xFFFFFFFF));
349 buffer[63] = '\0';
350 plugin_submit ("ps_cputime", ps->name, buffer);
352 DBG ("name = %s; num_proc = %i; num_lwp = %i; vmem_rss = %i; "
353 "vmem_minflt = %i; vmem_majflt = %i; "
354 "cpu_user = %i; cpu_system = %i;",
355 ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss,
356 ps->vmem_minflt, ps->vmem_majflt, ps->cpu_user,
357 ps->cpu_system);
359 }
361 #if KERNEL_LINUX
362 static int *ps_read_tasks (int pid)
363 {
364 int *list = NULL;
365 int list_size = 1; /* size of allocated space, in elements */
366 int list_len = 0; /* number of currently used elements */
368 char dirname[64];
369 DIR *dh;
370 struct dirent *ent;
372 snprintf (dirname, 64, "/proc/%i/task", pid);
373 dirname[63] = '\0';
375 if ((dh = opendir (dirname)) == NULL)
376 {
377 syslog (LOG_NOTICE, "processes plugin: Failed to open directory `%s'",
378 dirname);
379 return (NULL);
380 }
382 while ((ent = readdir (dh)) != NULL)
383 {
384 if (!isdigit (ent->d_name[0]))
385 continue;
387 if ((list_len + 1) >= list_size)
388 {
389 int *new_ptr;
390 int new_size = 2 * list_size;
391 /* Comes in sizes: 2, 4, 8, 16, ... */
393 new_ptr = (int *) realloc (list, (size_t) (sizeof (int) * new_size));
394 if (new_ptr == NULL)
395 {
396 if (list != NULL)
397 free (list);
398 syslog (LOG_ERR, "processes plugin: "
399 "Failed to allocate more memory.");
400 return (NULL);
401 }
403 list = new_ptr;
404 list_size = new_size;
406 memset (list + list_len, 0, sizeof (int) * (list_size - list_len));
407 }
409 list[list_len] = atoi (ent->d_name);
410 if (list[list_len] != 0)
411 list_len++;
412 }
414 closedir (dh);
416 assert (list_len < list_size);
417 assert (list[list_len] == 0);
419 return (list);
420 }
422 int ps_read_process (int pid, procstat_t *ps, char *state)
423 {
424 char filename[64];
425 char buffer[1024];
426 FILE *fh;
428 char *fields[64];
429 char fields_len;
431 int *tasks;
432 int i;
434 int ppid;
435 int name_len;
437 memset (ps, 0, sizeof (procstat_t));
439 snprintf (filename, 64, "/proc/%i/stat", pid);
440 filename[63] = '\0';
442 if ((fh = fopen (filename, "r")) == NULL)
443 return (-1);
445 if (fgets (buffer, 1024, fh) == NULL)
446 {
447 fclose (fh);
448 return (-1);
449 }
451 fclose (fh);
453 fields_len = strsplit (buffer, fields, 64);
454 if (fields_len < 24)
455 {
456 DBG ("`%s' has only %i fields..",
457 filename, fields_len);
458 return (-1);
459 }
460 else if (fields_len != 41)
461 {
462 DBG ("WARNING: (fields_len = %i) != 41", fields_len);
463 }
465 /* copy the name, strip brackets in the process */
466 name_len = strlen (fields[1]) - 2;
467 if ((fields[1][0] != '(') || (fields[1][name_len + 1] != ')'))
468 {
469 DBG ("No brackets found in process name: `%s'", fields[1]);
470 return (-1);
471 }
472 fields[1] = fields[1] + 1;
473 fields[1][name_len] = '\0';
474 strncpy (ps->name, fields[1], PROCSTAT_NAME_LEN);
476 ppid = atoi (fields[3]);
478 if ((tasks = ps_read_tasks (pid)) == NULL)
479 {
480 DBG ("ps_read_tasks (%i) failed.", pid);
481 return (-1);
482 }
484 *state = '\0';
485 ps->num_lwp = 0;
486 ps->num_proc = 1;
487 for (i = 0; tasks[i] != 0; i++)
488 ps->num_lwp++;
490 free (tasks);
491 tasks = NULL;
493 /* Leave the rest at zero if this is only an LWP */
494 if (ps->num_proc == 0)
495 {
496 DBG ("This is only an LWP: pid = %i; name = %s;",
497 pid, ps->name);
498 return (0);
499 }
501 ps->vmem_minflt = atol (fields[9]);
502 ps->vmem_majflt = atol (fields[11]);
503 ps->cpu_user = atoll (fields[13]);
504 ps->cpu_system = atoll (fields[14]);
505 ps->vmem_rss = atol (fields[23]);
507 /* Convert jiffies to useconds */
508 ps->cpu_user = ps->cpu_user * 1000000 / CONFIG_HZ;
509 ps->cpu_system = ps->cpu_system * 1000000 / CONFIG_HZ;
510 ps->vmem_rss = ps->vmem_rss * pagesize_g;
512 *state = fields[2][0];
514 /* success */
515 return (0);
516 } /* int ps_read_process (...) */
517 #endif /* KERNEL_LINUX */
519 static void ps_read (void)
520 {
521 #if HAVE_THREAD_INFO
522 kern_return_t status;
524 int pset;
525 processor_set_t port_pset_priv;
527 int task;
528 task_array_t task_list;
529 mach_msg_type_number_t task_list_len;
531 int thread;
532 thread_act_array_t thread_list;
533 mach_msg_type_number_t thread_list_len;
534 thread_basic_info_data_t thread_data;
535 mach_msg_type_number_t thread_data_len;
537 int running = 0;
538 int sleeping = 0;
539 int zombies = 0;
540 int stopped = 0;
541 int blocked = 0;
543 /*
544 * The Mach-concept is a little different from the traditional UNIX
545 * concept: All the work is done in threads. Threads are contained in
546 * `tasks'. Therefore, `task status' doesn't make much sense, since
547 * it's actually a `thread status'.
548 * Tasks are assigned to sets of processors, so that's where you go to
549 * get a list.
550 */
551 for (pset = 0; pset < pset_list_len; pset++)
552 {
553 if ((status = host_processor_set_priv (port_host_self,
554 pset_list[pset],
555 &port_pset_priv)) != KERN_SUCCESS)
556 {
557 syslog (LOG_ERR, "host_processor_set_priv failed: %s\n",
558 mach_error_string (status));
559 continue;
560 }
562 if ((status = processor_set_tasks (port_pset_priv,
563 &task_list,
564 &task_list_len)) != KERN_SUCCESS)
565 {
566 syslog (LOG_ERR, "processor_set_tasks failed: %s\n",
567 mach_error_string (status));
568 mach_port_deallocate (port_task_self, port_pset_priv);
569 continue;
570 }
572 for (task = 0; task < task_list_len; task++)
573 {
574 status = task_threads (task_list[task], &thread_list,
575 &thread_list_len);
576 if (status != KERN_SUCCESS)
577 {
578 /* Apple's `top' treats this case a zombie. It
579 * makes sense to some extend: A `zombie'
580 * thread is nonsense, since the task/process
581 * is dead. */
582 zombies++;
583 DBG ("task_threads failed: %s",
584 mach_error_string (status));
585 if (task_list[task] != port_task_self)
586 mach_port_deallocate (port_task_self,
587 task_list[task]);
588 continue; /* with next task_list */
589 }
591 for (thread = 0; thread < thread_list_len; thread++)
592 {
593 thread_data_len = THREAD_BASIC_INFO_COUNT;
594 status = thread_info (thread_list[thread],
595 THREAD_BASIC_INFO,
596 (thread_info_t) &thread_data,
597 &thread_data_len);
598 if (status != KERN_SUCCESS)
599 {
600 syslog (LOG_ERR, "thread_info failed: %s\n",
601 mach_error_string (status));
602 if (task_list[task] != port_task_self)
603 mach_port_deallocate (port_task_self,
604 thread_list[thread]);
605 continue; /* with next thread_list */
606 }
608 switch (thread_data.run_state)
609 {
610 case TH_STATE_RUNNING:
611 running++;
612 break;
613 case TH_STATE_STOPPED:
614 /* What exactly is `halted'? */
615 case TH_STATE_HALTED:
616 stopped++;
617 break;
618 case TH_STATE_WAITING:
619 sleeping++;
620 break;
621 case TH_STATE_UNINTERRUPTIBLE:
622 blocked++;
623 break;
624 /* There is no `zombie' case here,
625 * since there are no zombie-threads.
626 * There's only zombie tasks, which are
627 * handled above. */
628 default:
629 syslog (LOG_WARNING,
630 "Unknown thread status: %s",
631 thread_data.run_state);
632 break;
633 } /* switch (thread_data.run_state) */
635 if (task_list[task] != port_task_self)
636 {
637 status = mach_port_deallocate (port_task_self,
638 thread_list[thread]);
639 if (status != KERN_SUCCESS)
640 syslog (LOG_ERR, "mach_port_deallocate failed: %s",
641 mach_error_string (status));
642 }
643 } /* for (thread_list) */
645 if ((status = vm_deallocate (port_task_self,
646 (vm_address_t) thread_list,
647 thread_list_len * sizeof (thread_act_t)))
648 != KERN_SUCCESS)
649 {
650 syslog (LOG_ERR, "vm_deallocate failed: %s",
651 mach_error_string (status));
652 }
653 thread_list = NULL;
654 thread_list_len = 0;
656 /* Only deallocate the task port, if it isn't our own.
657 * Don't know what would happen in that case, but this
658 * is what Apple's top does.. ;) */
659 if (task_list[task] != port_task_self)
660 {
661 status = mach_port_deallocate (port_task_self,
662 task_list[task]);
663 if (status != KERN_SUCCESS)
664 syslog (LOG_ERR, "mach_port_deallocate failed: %s",
665 mach_error_string (status));
666 }
667 } /* for (task_list) */
669 if ((status = vm_deallocate (port_task_self,
670 (vm_address_t) task_list,
671 task_list_len * sizeof (task_t))) != KERN_SUCCESS)
672 {
673 syslog (LOG_ERR, "vm_deallocate failed: %s",
674 mach_error_string (status));
675 }
676 task_list = NULL;
677 task_list_len = 0;
679 if ((status = mach_port_deallocate (port_task_self, port_pset_priv))
680 != KERN_SUCCESS)
681 {
682 syslog (LOG_ERR, "mach_port_deallocate failed: %s",
683 mach_error_string (status));
684 }
685 } /* for (pset_list) */
687 ps_submit (running, sleeping, zombies, stopped, -1, blocked);
688 /* #endif HAVE_THREAD_INFO */
690 #elif KERNEL_LINUX
691 int running = 0;
692 int sleeping = 0;
693 int zombies = 0;
694 int stopped = 0;
695 int paging = 0;
696 int blocked = 0;
698 struct dirent *ent;
699 DIR *proc;
700 int pid;
702 int status;
703 procstat_t ps;
704 char state;
706 procstat_t *ps_ptr;
708 running = sleeping = zombies = stopped = paging = blocked = 0;
709 ps_list_reset (list_head_g);
711 if ((proc = opendir ("/proc")) == NULL)
712 {
713 syslog (LOG_ERR, "Cannot open `/proc': %s", strerror (errno));
714 return;
715 }
717 while ((ent = readdir (proc)) != NULL)
718 {
719 if (!isdigit (ent->d_name[0]))
720 continue;
722 if ((pid = atoi (ent->d_name)) < 1)
723 continue;
725 status = ps_read_process (pid, &ps, &state);
726 if (status != 0)
727 {
728 DBG ("ps_read_process failed: %i", status);
729 continue;
730 }
732 switch (state)
733 {
734 case 'R': running++; break;
735 case 'S': sleeping++; break;
736 case 'D': blocked++; break;
737 case 'Z': zombies++; break;
738 case 'T': stopped++; break;
739 case 'W': paging++; break;
740 }
742 if (list_head_g != NULL)
743 ps_list_add (list_head_g, &ps);
744 }
746 closedir (proc);
748 ps_submit (running, sleeping, zombies, stopped, paging, blocked);
750 for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next)
751 ps_submit_proc (ps_ptr);
752 #endif /* KERNEL_LINUX */
753 }
754 #else
755 # define ps_read NULL
756 #endif /* PROCESSES_HAVE_READ */
758 void module_register (void)
759 {
760 plugin_register (MODULE_NAME, ps_init, ps_read, ps_write);
761 plugin_register ("ps_rss", NULL, NULL, ps_rss_write);
762 plugin_register ("ps_cputime", NULL, NULL, ps_cputime_write);
763 cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num);
764 }
766 #undef BUFSIZE
767 #undef MODULE_NAME