Code

16e6059f144430729f6ca267dbafc0078b022294
[collectd.git] / src / processes.c
1 /**
2  * collectd - src/processes.c
3  * Copyright (C) 2005  Lyonel Vincent
4  * Copyright (C) 2006  Florian Forster (Mach code)
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; either version 2 of the License, or (at your
9  * option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with this program; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  *
20  * Authors:
21  *   Lyonel Vincent <lyonel at ezix.org>
22  *   Florian octo Forster <octo at verplant.org>
23  **/
25 #include "collectd.h"
26 #include "common.h"
27 #include "plugin.h"
28 #include "utils_debug.h"
29 #include "configfile.h"
31 /* Include header files for the mach system, if they exist.. */
32 #if HAVE_THREAD_INFO
33 #  if HAVE_MACH_MACH_INIT_H
34 #    include <mach/mach_init.h>
35 #  endif
36 #  if HAVE_MACH_HOST_PRIV_H
37 #    include <mach/host_priv.h>
38 #  endif
39 #  if HAVE_MACH_MACH_ERROR_H
40 #    include <mach/mach_error.h>
41 #  endif
42 #  if HAVE_MACH_MACH_HOST_H
43 #    include <mach/mach_host.h>
44 #  endif
45 #  if HAVE_MACH_MACH_PORT_H
46 #    include <mach/mach_port.h>
47 #  endif
48 #  if HAVE_MACH_MACH_TYPES_H
49 #    include <mach/mach_types.h>
50 #  endif
51 #  if HAVE_MACH_MESSAGE_H
52 #    include <mach/message.h>
53 #  endif
54 #  if HAVE_MACH_PROCESSOR_SET_H
55 #    include <mach/processor_set.h>
56 #  endif
57 #  if HAVE_MACH_TASK_H
58 #    include <mach/task.h>
59 #  endif
60 #  if HAVE_MACH_THREAD_ACT_H
61 #    include <mach/thread_act.h>
62 #  endif
63 #  if HAVE_MACH_VM_REGION_H
64 #    include <mach/vm_region.h>
65 #  endif
66 #  if HAVE_MACH_VM_MAP_H
67 #    include <mach/vm_map.h>
68 #  endif
69 #  if HAVE_MACH_VM_PROT_H
70 #    include <mach/vm_prot.h>
71 #  endif
72 /* #endif HAVE_THREAD_INFO */
74 #elif KERNEL_LINUX
75 #  if HAVE_LINUX_CONFIG_H
76 #    include <linux/config.h>
77 #  endif
78 #  ifndef CONFIG_HZ
79 #    define CONFIG_HZ 100
80 #  endif
81 #endif /* KERNEL_LINUX */
83 #define MODULE_NAME "processes"
85 #if HAVE_THREAD_INFO || KERNEL_LINUX
86 # define PROCESSES_HAVE_READ 1
87 #else
88 # define PROCESSES_HAVE_READ 0
89 #endif
91 #define BUFSIZE 256
93 static char *processes_file = "processes.rrd";
94 static char *processes_ds_def[] =
95 {
96         "DS:running:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
97         "DS:sleeping:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
98         "DS:zombies:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
99         "DS:stopped:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
100         "DS:paging:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
101         "DS:blocked:GAUGE:"COLLECTD_HEARTBEAT":0:65535",
102         NULL
103 };
104 static int processes_ds_num = 6;
106 static char *ps_rss_file = "processes/ps_rss-%s.rrd";
107 static char *ps_rss_ds_def[] =
109         /* max = 2^63 - 1 */
110         "DS:byte:GAUGE:"COLLECTD_HEARTBEAT":0:9223372036854775807",
111         NULL
112 };
113 static int ps_rss_ds_num = 1;
115 static char *config_keys[] =
117         "CollectName",
118         NULL
119 };
120 static int config_keys_num = 1;
122 typedef struct procstat
124 #define PROCSTAT_NAME_LEN 256
125         char               name[PROCSTAT_NAME_LEN];
126         unsigned int       num_proc;
127         unsigned int       num_lwp;
128         unsigned long      vmem_rss;
129         unsigned long      vmem_minflt;
130         unsigned long      vmem_majflt;
131         unsigned long long cpu_user;
132         unsigned long long cpu_system;
133         struct procstat   *next;
134 } procstat_t;
136 static procstat_t *list_head_g = NULL;
138 #if HAVE_THREAD_INFO
139 static mach_port_t port_host_self;
140 static mach_port_t port_task_self;
142 static processor_set_name_array_t pset_list;
143 static mach_msg_type_number_t     pset_list_len;
144 /* #endif HAVE_THREAD_INFO */
146 #elif KERNEL_LINUX
147 static long pagesize_g;
148 #endif /* KERNEL_LINUX */
150 static procstat_t *ps_list_append (procstat_t *list, const char *name)
152         procstat_t *new;
153         procstat_t *ptr;
155         if ((new = (procstat_t *) malloc (sizeof (procstat_t))) == NULL)
156                 return (NULL);
157         memset (new, 0, sizeof (procstat_t));
158         strncpy (new->name, name, PROCSTAT_NAME_LEN);
160         for (ptr = list; ptr != NULL; ptr = ptr->next)
161                 if (ptr->next == NULL)
162                         break;
164         if (ptr != NULL)
165                 ptr->next = new;
167         return (new);
170 static void ps_list_add (procstat_t *list, procstat_t *entry)
172         procstat_t *ptr;
174         ptr = list;
175         while ((ptr != NULL) && (strcmp (ptr->name, entry->name) != 0))
176                 ptr = ptr->next;
178         if (ptr == NULL)
179                 return;
181         ptr->num_proc    += entry->num_proc;
182         ptr->num_lwp     += entry->num_lwp;
183         ptr->vmem_rss    += entry->vmem_rss;
184         ptr->vmem_minflt += entry->vmem_minflt;
185         ptr->vmem_majflt += entry->vmem_majflt;
186         ptr->cpu_user    += entry->cpu_user;
187         ptr->cpu_system  += entry->cpu_system;
190 static void ps_list_reset (procstat_t *ps)
192         while (ps != NULL)
193         {
194                 ps->num_proc    = 0;
195                 ps->num_lwp     = 0;
196                 ps->vmem_rss    = 0;
197                 ps->vmem_minflt = 0;
198                 ps->vmem_majflt = 0;
199                 ps->cpu_user    = 0;
200                 ps->cpu_system  = 0;
201                 ps = ps->next;
202         }
205 static int ps_config (char *key, char *value)
207         if (strcasecmp (key, "CollectName") == 0)
208         {
209                 procstat_t *entry;
211                 entry = ps_list_append (list_head_g, value);
212                 if (entry == NULL)
213                 {
214                         syslog (LOG_ERR, "processes plugin: ps_list_append failed.");
215                         return (1);
216                 }
217                 if (list_head_g == NULL)
218                         list_head_g = entry;
219         }
220         else
221         {
222                 return (-1);
223         }
225         return (0);
228 static void ps_init (void)
230 #if HAVE_THREAD_INFO
231         kern_return_t status;
233         port_host_self = mach_host_self ();
234         port_task_self = mach_task_self ();
236         if (pset_list != NULL)
237         {
238                 vm_deallocate (port_task_self,
239                                 (vm_address_t) pset_list,
240                                 pset_list_len * sizeof (processor_set_t));
241                 pset_list = NULL;
242                 pset_list_len = 0;
243         }
245         if ((status = host_processor_sets (port_host_self,
246                                         &pset_list,
247                                         &pset_list_len)) != KERN_SUCCESS)
248         {
249                 syslog (LOG_ERR, "host_processor_sets failed: %s\n",
250                                 mach_error_string (status));
251                 pset_list = NULL;
252                 pset_list_len = 0;
253                 return;
254         }
255 /* #endif HAVE_THREAD_INFO */
257 #elif KERNEL_LINUX
258         pagesize_g = sysconf(_SC_PAGESIZE);
259         DBG ("pagesize_g = %li; CONFIG_HZ = %i;",
260                         pagesize_g, CONFIG_HZ);
261 #endif /* KERNEL_LINUX */
263         return;
266 static void ps_write (char *host, char *inst, char *val)
268         rrd_update_file (host, processes_file, val,
269                         processes_ds_def, processes_ds_num);
272 static void ps_rss_write (char *host, char *inst, char *val)
274         char filename[256];
275         int status;
277         status = snprintf (filename, 256, ps_rss_file, inst);
278         if ((status < 1) || (status >= 256))
279                 return;
281         rrd_update_file (host, filename, val, ps_rss_ds_def, ps_rss_ds_num);
284 #if PROCESSES_HAVE_READ
285 static void ps_submit (int running,
286                 int sleeping,
287                 int zombies,
288                 int stopped,
289                 int paging,
290                 int blocked)
292         char buf[BUFSIZE];
294         if (snprintf (buf, BUFSIZE, "%u:%i:%i:%i:%i:%i:%i",
295                                 (unsigned int) curtime,
296                                 running, sleeping, zombies, stopped, paging,
297                                 blocked) >= BUFSIZE)
298                 return;
300         DBG ("running = %i; sleeping = %i; zombies = %i; stopped = %i; paging = %i; blocked = %i;",
301                         running, sleeping, zombies, stopped, paging, blocked);
303         plugin_submit (MODULE_NAME, "-", buf);
306 static void ps_submit_proc (procstat_t *ps)
308         char buffer[64];
310         if (ps == NULL)
311                 return;
313         snprintf (buffer, 64, "%u:%lu",
314                         (unsigned int) curtime,
315                         ps->vmem_rss);
316         buffer[63] = '\0';
317         plugin_submit ("ps_rss", ps->name, buffer);
319         DBG ("name = %s; num_proc = %i; num_lwp = %i; vmem_rss = %i; "
320                         "vmem_minflt = %i; vmem_majflt = %i; "
321                         "cpu_user = %i; cpu_system = %i;",
322                         ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss,
323                         ps->vmem_minflt, ps->vmem_majflt, ps->cpu_user,
324                         ps->cpu_system);
328 #if KERNEL_LINUX
329 static int *ps_read_tasks (int pid)
331         int *list = NULL;
332         int  list_size = 1; /* size of allocated space, in elements */
333         int  list_len = 0;  /* number of currently used elements */
335         char           dirname[64];
336         DIR           *dh;
337         struct dirent *ent;
339         snprintf (dirname, 64, "/proc/%i/task", pid);
340         dirname[63] = '\0';
342         if ((dh = opendir (dirname)) == NULL)
343         {
344                 syslog (LOG_NOTICE, "processes plugin: Failed to open directory `%s'",
345                                 dirname);
346                 return (NULL);
347         }
349         while ((ent = readdir (dh)) != NULL)
350         {
351                 if (!isdigit (ent->d_name[0]))
352                         continue;
354                 if ((list_len + 1) >= list_size)
355                 {
356                         int *new_ptr;
357                         int  new_size = 2 * list_size;
358                         /* Comes in sizes: 2, 4, 8, 16, ... */
360                         new_ptr = (int *) realloc (list, (size_t) (sizeof (int) * new_size));
361                         if (new_ptr == NULL)
362                         {
363                                 if (list != NULL)
364                                         free (list);
365                                 syslog (LOG_ERR, "processes plugin: "
366                                                 "Failed to allocate more memory.");
367                                 return (NULL);
368                         }
370                         list = new_ptr;
371                         list_size = new_size;
373                         memset (list + list_len, 0, sizeof (int) * (list_size - list_len));
374                 }
376                 list[list_len] = atoi (ent->d_name);
377                 if (list[list_len] != 0)
378                         list_len++;
379         }
381         closedir (dh);
383         assert (list_len < list_size);
384         assert (list[list_len] == 0);
386         return (list);
389 int ps_read_process (int pid, procstat_t *ps, char *state)
391         char  filename[64];
392         char  buffer[1024];
393         FILE *fh;
395         char *fields[64];
396         char  fields_len;
398         int  *tasks;
399         int   i;
401         int   ppid;
402         int   name_len;
404         memset (ps, 0, sizeof (procstat_t));
406         snprintf (filename, 64, "/proc/%i/stat", pid);
407         filename[63] = '\0';
409         if ((fh = fopen (filename, "r")) == NULL)
410                 return (-1);
412         if (fgets (buffer, 1024, fh) == NULL)
413         {
414                 fclose (fh);
415                 return (-1);
416         }
418         fclose (fh);
420         fields_len = strsplit (buffer, fields, 64);
421         if (fields_len < 24)
422         {
423                 DBG ("`%s' has only %i fields..",
424                                 filename, fields_len);
425                 return (-1);
426         }
427         else if (fields_len != 41)
428         {
429                 DBG ("WARNING: (fields_len = %i) != 41", fields_len);
430         }
432         /* copy the name, strip brackets in the process */
433         name_len = strlen (fields[1]) - 2;
434         if ((fields[1][0] != '(') || (fields[1][name_len + 1] != ')'))
435         {
436                 DBG ("No brackets found in process name: `%s'", fields[1]);
437                 return (-1);
438         }
439         fields[1] = fields[1] + 1;
440         fields[1][name_len] = '\0';
441         strncpy (ps->name, fields[1], PROCSTAT_NAME_LEN);
443         ppid = atoi (fields[3]);
445         if ((tasks = ps_read_tasks (pid)) == NULL)
446         {
447                 DBG ("ps_read_tasks (%i) failed.", pid);
448                 return (-1);
449         }
451         *state = '\0';
452         ps->num_lwp  = 0;
453         ps->num_proc = 1;
454         for (i = 0; tasks[i] != 0; i++)
455                 ps->num_lwp++;
457         free (tasks);
458         tasks = NULL;
460         /* Leave the rest at zero if this is only an LWP */
461         if (ps->num_proc == 0)
462         {
463                 DBG ("This is only an LWP: pid = %i; name = %s;",
464                                 pid, ps->name);
465                 return (0);
466         }
468         ps->vmem_minflt = atol  (fields[9]);
469         ps->vmem_majflt = atol  (fields[11]);
470         ps->cpu_user    = atoll (fields[13]);
471         ps->cpu_system  = atoll (fields[14]);
472         ps->vmem_rss    = atol  (fields[23]);
473         
474         /* Convert jiffies to useconds */
475         ps->cpu_user   = ps->cpu_user   * 1000000 / CONFIG_HZ;
476         ps->cpu_system = ps->cpu_system * 1000000 / CONFIG_HZ;
477         ps->vmem_rss   = ps->vmem_rss * pagesize_g;
479         *state = fields[2][0];
481         /* success */
482         return (0);
483 } /* int ps_read_process (...) */
484 #endif /* KERNEL_LINUX */
486 static void ps_read (void)
488 #if HAVE_THREAD_INFO
489         kern_return_t            status;
491         int                      pset;
492         processor_set_t          port_pset_priv;
494         int                      task;
495         task_array_t             task_list;
496         mach_msg_type_number_t   task_list_len;
498         int                      thread;
499         thread_act_array_t       thread_list;
500         mach_msg_type_number_t   thread_list_len;
501         thread_basic_info_data_t thread_data;
502         mach_msg_type_number_t   thread_data_len;
504         int running  = 0;
505         int sleeping = 0;
506         int zombies  = 0;
507         int stopped  = 0;
508         int blocked  = 0;
510         /*
511          * The Mach-concept is a little different from the traditional UNIX
512          * concept: All the work is done in threads. Threads are contained in
513          * `tasks'. Therefore, `task status' doesn't make much sense, since
514          * it's actually a `thread status'.
515          * Tasks are assigned to sets of processors, so that's where you go to
516          * get a list.
517          */
518         for (pset = 0; pset < pset_list_len; pset++)
519         {
520                 if ((status = host_processor_set_priv (port_host_self,
521                                                 pset_list[pset],
522                                                 &port_pset_priv)) != KERN_SUCCESS)
523                 {
524                         syslog (LOG_ERR, "host_processor_set_priv failed: %s\n",
525                                         mach_error_string (status));
526                         continue;
527                 }
529                 if ((status = processor_set_tasks (port_pset_priv,
530                                                 &task_list,
531                                                 &task_list_len)) != KERN_SUCCESS)
532                 {
533                         syslog (LOG_ERR, "processor_set_tasks failed: %s\n",
534                                         mach_error_string (status));
535                         mach_port_deallocate (port_task_self, port_pset_priv);
536                         continue;
537                 }
539                 for (task = 0; task < task_list_len; task++)
540                 {
541                         status = task_threads (task_list[task], &thread_list,
542                                         &thread_list_len);
543                         if (status != KERN_SUCCESS)
544                         {
545                                 /* Apple's `top' treats this case a zombie. It
546                                  * makes sense to some extend: A `zombie'
547                                  * thread is nonsense, since the task/process
548                                  * is dead. */
549                                 zombies++;
550                                 DBG ("task_threads failed: %s",
551                                                 mach_error_string (status));
552                                 if (task_list[task] != port_task_self)
553                                         mach_port_deallocate (port_task_self,
554                                                         task_list[task]);
555                                 continue; /* with next task_list */
556                         }
558                         for (thread = 0; thread < thread_list_len; thread++)
559                         {
560                                 thread_data_len = THREAD_BASIC_INFO_COUNT;
561                                 status = thread_info (thread_list[thread],
562                                                 THREAD_BASIC_INFO,
563                                                 (thread_info_t) &thread_data,
564                                                 &thread_data_len);
565                                 if (status != KERN_SUCCESS)
566                                 {
567                                         syslog (LOG_ERR, "thread_info failed: %s\n",
568                                                         mach_error_string (status));
569                                         if (task_list[task] != port_task_self)
570                                                 mach_port_deallocate (port_task_self,
571                                                                 thread_list[thread]);
572                                         continue; /* with next thread_list */
573                                 }
575                                 switch (thread_data.run_state)
576                                 {
577                                         case TH_STATE_RUNNING:
578                                                 running++;
579                                                 break;
580                                         case TH_STATE_STOPPED:
581                                         /* What exactly is `halted'? */
582                                         case TH_STATE_HALTED:
583                                                 stopped++;
584                                                 break;
585                                         case TH_STATE_WAITING:
586                                                 sleeping++;
587                                                 break;
588                                         case TH_STATE_UNINTERRUPTIBLE:
589                                                 blocked++;
590                                                 break;
591                                         /* There is no `zombie' case here,
592                                          * since there are no zombie-threads.
593                                          * There's only zombie tasks, which are
594                                          * handled above. */
595                                         default:
596                                                 syslog (LOG_WARNING,
597                                                                 "Unknown thread status: %s",
598                                                                 thread_data.run_state);
599                                                 break;
600                                 } /* switch (thread_data.run_state) */
602                                 if (task_list[task] != port_task_self)
603                                 {
604                                         status = mach_port_deallocate (port_task_self,
605                                                         thread_list[thread]);
606                                         if (status != KERN_SUCCESS)
607                                                 syslog (LOG_ERR, "mach_port_deallocate failed: %s",
608                                                                 mach_error_string (status));
609                                 }
610                         } /* for (thread_list) */
612                         if ((status = vm_deallocate (port_task_self,
613                                                         (vm_address_t) thread_list,
614                                                         thread_list_len * sizeof (thread_act_t)))
615                                         != KERN_SUCCESS)
616                         {
617                                 syslog (LOG_ERR, "vm_deallocate failed: %s",
618                                                 mach_error_string (status));
619                         }
620                         thread_list = NULL;
621                         thread_list_len = 0;
623                         /* Only deallocate the task port, if it isn't our own.
624                          * Don't know what would happen in that case, but this
625                          * is what Apple's top does.. ;) */
626                         if (task_list[task] != port_task_self)
627                         {
628                                 status = mach_port_deallocate (port_task_self,
629                                                 task_list[task]);
630                                 if (status != KERN_SUCCESS)
631                                         syslog (LOG_ERR, "mach_port_deallocate failed: %s",
632                                                         mach_error_string (status));
633                         }
634                 } /* for (task_list) */
636                 if ((status = vm_deallocate (port_task_self,
637                                 (vm_address_t) task_list,
638                                 task_list_len * sizeof (task_t))) != KERN_SUCCESS)
639                 {
640                         syslog (LOG_ERR, "vm_deallocate failed: %s",
641                                         mach_error_string (status));
642                 }
643                 task_list = NULL;
644                 task_list_len = 0;
646                 if ((status = mach_port_deallocate (port_task_self, port_pset_priv))
647                                 != KERN_SUCCESS)
648                 {
649                         syslog (LOG_ERR, "mach_port_deallocate failed: %s",
650                                         mach_error_string (status));
651                 }
652         } /* for (pset_list) */
654         ps_submit (running, sleeping, zombies, stopped, -1, blocked);
655 /* #endif HAVE_THREAD_INFO */
657 #elif KERNEL_LINUX
658         int running  = 0;
659         int sleeping = 0;
660         int zombies  = 0;
661         int stopped  = 0;
662         int paging   = 0;
663         int blocked  = 0;
665         struct dirent *ent;
666         DIR           *proc;
667         int            pid;
669         int        status;
670         procstat_t ps;
671         char       state;
673         procstat_t *ps_ptr;
675         running = sleeping = zombies = stopped = paging = blocked = 0;
676         ps_list_reset (list_head_g);
678         if ((proc = opendir ("/proc")) == NULL)
679         {
680                 syslog (LOG_ERR, "Cannot open `/proc': %s", strerror (errno));
681                 return;
682         }
684         while ((ent = readdir (proc)) != NULL)
685         {
686                 if (!isdigit (ent->d_name[0]))
687                         continue;
689                 if ((pid = atoi (ent->d_name)) < 1)
690                         continue;
692                 status = ps_read_process (pid, &ps, &state);
693                 if (status != 0)
694                 {
695                         DBG ("ps_read_process failed: %i", status);
696                         continue;
697                 }
699                 switch (state)
700                 {
701                         case 'R': running++;  break;
702                         case 'S': sleeping++; break;
703                         case 'D': blocked++;  break;
704                         case 'Z': zombies++;  break;
705                         case 'T': stopped++;  break;
706                         case 'W': paging++;   break;
707                 }
709                 if (list_head_g != NULL)
710                         ps_list_add (list_head_g, &ps);
711         }
713         closedir (proc);
715         ps_submit (running, sleeping, zombies, stopped, paging, blocked);
717         for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next)
718                 ps_submit_proc (ps_ptr);
719 #endif /* KERNEL_LINUX */
721 #else
722 # define ps_read NULL
723 #endif /* PROCESSES_HAVE_READ */
725 void module_register (void)
727         plugin_register (MODULE_NAME, ps_init, ps_read, ps_write);
728         plugin_register ("ps_rss", NULL, NULL, ps_rss_write);
729         cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num);
732 #undef BUFSIZE
733 #undef MODULE_NAME