X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=src%2Fprocesses.c;h=ebf13815a1b7a204db9fc35f33208f73132ae381;hb=634504760b46b852ec2b812a7b68277e9c005f1b;hp=403fb7fa43c3f1a7c0de5c87a109deb47f9d268f;hpb=01d76f96fe59d46258e13be4c1e4c354797121d2;p=collectd.git diff --git a/src/processes.c b/src/processes.c index 403fb7fa..ebf13815 100644 --- a/src/processes.c +++ b/src/processes.c @@ -69,6 +69,9 @@ # if HAVE_MACH_VM_PROT_H # include # endif +# if HAVE_SYS_SYSCTL_H +# include +# endif /* #endif HAVE_THREAD_INFO */ #elif KERNEL_LINUX @@ -122,25 +125,71 @@ static char *ps_cputime_ds_def[] = }; static int ps_cputime_ds_num = 2; +static char *ps_count_file = "processes/ps_count-%s.rrd"; +static char *ps_count_ds_def[] = +{ + "DS:processes:GAUGE:"COLLECTD_HEARTBEAT":0:65535", + "DS:threads:GAUGE:"COLLECTD_HEARTBEAT":0:65535", + NULL +}; +static int ps_count_ds_num = 2; + +static char *ps_pagefaults_file = "processes/ps_pagefaults-%s.rrd"; +static char *ps_pagefaults_ds_def[] = +{ + /* max = 2^63 - 1 */ + "DS:minflt:COUNTER:"COLLECTD_HEARTBEAT":0:9223372036854775807", + "DS:majflt:COUNTER:"COLLECTD_HEARTBEAT":0:9223372036854775807", + NULL +}; +static int ps_pagefaults_ds_num = 2; + static char *config_keys[] = { - "CollectName", + "Process", NULL }; static int config_keys_num = 1; -typedef struct procstat +typedef struct procstat_entry_s { + unsigned long id; + unsigned long age; + + unsigned long num_proc; + unsigned long num_lwp; + unsigned long vmem_rss; + + unsigned long vmem_minflt; + unsigned long vmem_majflt; + unsigned long vmem_minflt_counter; + unsigned long vmem_majflt_counter; + + unsigned long cpu_user; + unsigned long cpu_system; + unsigned long cpu_user_counter; + unsigned long cpu_system_counter; + + struct procstat_entry_s *next; +} procstat_entry_t; + #define PROCSTAT_NAME_LEN 256 - char name[PROCSTAT_NAME_LEN]; - unsigned int num_proc; - unsigned int num_lwp; - unsigned long vmem_rss; - unsigned long vmem_minflt; - unsigned long vmem_majflt; - unsigned long long cpu_user; - unsigned long long cpu_system; +typedef struct procstat +{ + char name[PROCSTAT_NAME_LEN]; + + unsigned long num_proc; + unsigned long num_lwp; + unsigned long vmem_rss; + + unsigned long vmem_minflt_counter; + unsigned long vmem_majflt_counter; + + unsigned long cpu_user_counter; + unsigned long cpu_system_counter; + struct procstat *next; + struct procstat_entry_s *instances; } procstat_t; static procstat_t *list_head_g = NULL; @@ -157,75 +206,210 @@ static mach_msg_type_number_t pset_list_len; static long pagesize_g; #endif /* KERNEL_LINUX */ -static procstat_t *ps_list_append (procstat_t *list, const char *name) +#if HAVE_THREAD_INFO | KERNEL_LINUX +static void ps_list_register (const char *name) { procstat_t *new; procstat_t *ptr; if ((new = (procstat_t *) malloc (sizeof (procstat_t))) == NULL) - return (NULL); + return; memset (new, 0, sizeof (procstat_t)); strncpy (new->name, name, PROCSTAT_NAME_LEN); - for (ptr = list; ptr != NULL; ptr = ptr->next) + for (ptr = list_head_g; ptr != NULL; ptr = ptr->next) + { + if (strcmp (ptr->name, name) == 0) + return; if (ptr->next == NULL) break; + } - if (ptr != NULL) + if (ptr == NULL) + list_head_g = new; + else ptr->next = new; - - return (new); } -static void ps_list_add (procstat_t *list, procstat_t *entry) +static procstat_t *ps_list_search (const char *name) { procstat_t *ptr; - ptr = list; - while ((ptr != NULL) && (strcmp (ptr->name, entry->name) != 0)) - ptr = ptr->next; + for (ptr = list_head_g; ptr != NULL; ptr = ptr->next) + if (strcmp (ptr->name, name) == 0) + break; - if (ptr == NULL) + return (ptr); +} + +static void ps_list_add (const char *name, procstat_entry_t *entry) +{ + procstat_t *ps; + procstat_entry_t *pse; + + if (entry->id == 0) return; - ptr->num_proc += entry->num_proc; - ptr->num_lwp += entry->num_lwp; - ptr->vmem_rss += entry->vmem_rss; - ptr->vmem_minflt += entry->vmem_minflt; - ptr->vmem_majflt += entry->vmem_majflt; - ptr->cpu_user += entry->cpu_user; - ptr->cpu_system += entry->cpu_system; + if ((ps = ps_list_search (name)) == NULL) + return; + + for (pse = ps->instances; pse != NULL; pse = pse->next) + if ((pse->id == entry->id) || (pse->next == NULL)) + break; + + if ((pse == NULL) || (pse->id != entry->id)) + { + procstat_entry_t *new; + + new = (procstat_entry_t *) malloc (sizeof (procstat_entry_t)); + if (new == NULL) + return; + memset (new, 0, sizeof (procstat_entry_t)); + new->id = entry->id; + + if (pse == NULL) + ps->instances = new; + else + pse->next = new; + + pse = new; + } + + pse->age = 0; + pse->num_proc = entry->num_proc; + pse->num_lwp = entry->num_lwp; + pse->vmem_rss = entry->vmem_rss; + + ps->num_proc += pse->num_proc; + ps->num_lwp += pse->num_lwp; + ps->vmem_rss += pse->vmem_rss; + + if ((entry->vmem_minflt_counter == 0) + && (entry->vmem_majflt_counter == 0)) + { + pse->vmem_minflt_counter += entry->vmem_minflt; + pse->vmem_minflt = entry->vmem_minflt; + + pse->vmem_majflt_counter += entry->vmem_majflt; + pse->vmem_majflt = entry->vmem_majflt; + } + else + { + if (entry->vmem_minflt_counter < pse->vmem_minflt_counter) + { + pse->vmem_minflt = entry->vmem_minflt_counter + + (ULONG_MAX - pse->vmem_minflt_counter); + } + else + { + pse->vmem_minflt = entry->vmem_minflt_counter - pse->vmem_minflt_counter; + } + pse->vmem_minflt_counter = entry->vmem_minflt_counter; + + if (entry->vmem_majflt_counter < pse->vmem_majflt_counter) + { + pse->vmem_majflt = entry->vmem_majflt_counter + + (ULONG_MAX - pse->vmem_majflt_counter); + } + else + { + pse->vmem_majflt = entry->vmem_majflt_counter - pse->vmem_majflt_counter; + } + pse->vmem_majflt_counter = entry->vmem_majflt_counter; + } + + ps->vmem_minflt_counter += pse->vmem_minflt; + ps->vmem_majflt_counter += pse->vmem_majflt; + + if ((entry->cpu_user_counter == 0) + && (entry->cpu_system_counter == 0)) + { + pse->cpu_user_counter += entry->cpu_user; + pse->cpu_user = entry->cpu_user; + + pse->cpu_system_counter += entry->cpu_system; + pse->cpu_system = entry->cpu_system; + } + else + { + if (entry->cpu_user_counter < pse->cpu_user_counter) + { + pse->cpu_user = entry->cpu_user_counter + + (ULONG_MAX - pse->cpu_user_counter); + } + else + { + pse->cpu_user = entry->cpu_user_counter - pse->cpu_user_counter; + } + pse->cpu_user_counter = entry->cpu_user_counter; + + if (entry->cpu_system_counter < pse->cpu_system_counter) + { + pse->cpu_system = entry->cpu_system_counter + + (ULONG_MAX - pse->cpu_system_counter); + } + else + { + pse->cpu_system = entry->cpu_system_counter - pse->cpu_system_counter; + } + pse->cpu_system_counter = entry->cpu_system_counter; + } + + ps->cpu_user_counter += pse->cpu_user; + ps->cpu_system_counter += pse->cpu_system; } -static void ps_list_reset (procstat_t *ps) +static void ps_list_reset (void) { - while (ps != NULL) + procstat_t *ps; + procstat_entry_t *pse; + procstat_entry_t *pse_prev; + + for (ps = list_head_g; ps != NULL; ps = ps->next) { ps->num_proc = 0; ps->num_lwp = 0; ps->vmem_rss = 0; - ps->vmem_minflt = 0; - ps->vmem_majflt = 0; - ps->cpu_user = 0; - ps->cpu_system = 0; - ps = ps->next; - } + + pse_prev = NULL; + pse = ps->instances; + while (pse != NULL) + { + if (pse->age > 10) + { + DBG ("Removing this procstat entry cause it's too old: " + "id = %lu; name = %s;", + pse->id, ps->name); + + if (pse_prev == NULL) + { + ps->instances = pse->next; + free (pse); + pse = ps->instances; + } + else + { + pse_prev->next = pse->next; + free (pse); + pse = pse_prev->next; + } + } + else + { + pse->age++; + pse_prev = pse; + pse = pse->next; + } + } /* while (pse != NULL) */ + } /* for (ps = list_head_g; ps != NULL; ps = ps->next) */ } +#endif /* HAVE_THREAD_INFO | KERNEL_LINUX */ static int ps_config (char *key, char *value) { - if (strcasecmp (key, "CollectName") == 0) + if (strcasecmp (key, "Process") == 0) { - procstat_t *entry; - - entry = ps_list_append (list_head_g, value); - if (entry == NULL) - { - syslog (LOG_ERR, "processes plugin: ps_list_append failed."); - return (1); - } - if (list_head_g == NULL) - list_head_g = entry; + ps_list_register (value); } else { @@ -306,6 +490,36 @@ static void ps_cputime_write (char *host, char *inst, char *val) ps_cputime_ds_def, ps_cputime_ds_num); } +static void ps_count_write (char *host, char *inst, char *val) +{ + char filename[256]; + int status; + + status = snprintf (filename, 256, ps_count_file, inst); + if ((status < 1) || (status >= 256)) + return; + + DBG ("host = %s; filename = %s; val = %s;", + host, filename, val); + rrd_update_file (host, filename, val, + ps_count_ds_def, ps_count_ds_num); +} + +static void ps_pagefaults_write (char *host, char *inst, char *val) +{ + char filename[256]; + int status; + + status = snprintf (filename, 256, ps_pagefaults_file, inst); + if ((status < 1) || (status >= 256)) + return; + + DBG ("host = %s; filename = %s; val = %s;", + host, filename, val); + rrd_update_file (host, filename, val, + ps_pagefaults_ds_def, ps_pagefaults_ds_num); +} + #if PROCESSES_HAVE_READ static void ps_submit (int running, int sleeping, @@ -328,7 +542,7 @@ static void ps_submit (int running, plugin_submit (MODULE_NAME, "-", buf); } -static void ps_submit_proc (procstat_t *ps) +static void ps_submit_proc_list (procstat_t *ps) { char buffer[64]; @@ -344,17 +558,29 @@ static void ps_submit_proc (procstat_t *ps) snprintf (buffer, 64, "%u:%u:%u", (unsigned int) curtime, /* Make the counter overflow */ - (unsigned int) (ps->cpu_user & 0xFFFFFFFF), - (unsigned int) (ps->cpu_system & 0xFFFFFFFF)); + (unsigned int) (ps->cpu_user_counter & 0xFFFFFFFF), + (unsigned int) (ps->cpu_system_counter & 0xFFFFFFFF)); buffer[63] = '\0'; plugin_submit ("ps_cputime", ps->name, buffer); - DBG ("name = %s; num_proc = %i; num_lwp = %i; vmem_rss = %i; " - "vmem_minflt = %i; vmem_majflt = %i; " - "cpu_user = %i; cpu_system = %i;", + snprintf (buffer, 64, "%u:%lu:%lu", + (unsigned int) curtime, + ps->num_proc, ps->num_lwp); + buffer[63] = '\0'; + plugin_submit ("ps_count", ps->name, buffer); + + snprintf (buffer, 64, "%u:%lu:%lu", + (unsigned int) curtime, + ps->vmem_minflt_counter, ps->vmem_majflt_counter); + buffer[63] = '\0'; + plugin_submit ("ps_pagefaults", ps->name, buffer); + + DBG ("name = %s; num_proc = %lu; num_lwp = %lu; vmem_rss = %lu; " + "vmem_minflt_counter = %lu; vmem_majflt_counter = %lu; " + "cpu_user_counter = %lu; cpu_system_counter = %lu;", ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss, - ps->vmem_minflt, ps->vmem_majflt, ps->cpu_user, - ps->cpu_system); + ps->vmem_minflt_counter, ps->vmem_majflt_counter, ps->cpu_user_counter, + ps->cpu_system_counter); } @@ -434,6 +660,10 @@ int ps_read_process (int pid, procstat_t *ps, char *state) int ppid; int name_len; + long long unsigned cpu_user_counter; + long long unsigned cpu_system_counter; + long long unsigned vmem_rss; + memset (ps, 0, sizeof (procstat_t)); snprintf (filename, 64, "/proc/%i/stat", pid); @@ -477,18 +707,23 @@ int ps_read_process (int pid, procstat_t *ps, char *state) if ((tasks = ps_read_tasks (pid)) == NULL) { + /* This happends for zombied, e.g. */ DBG ("ps_read_tasks (%i) failed.", pid); - return (-1); + *state = 'Z'; + ps->num_lwp = 0; + ps->num_proc = 0; + } + else + { + *state = '\0'; + ps->num_lwp = 0; + ps->num_proc = 1; + for (i = 0; tasks[i] != 0; i++) + ps->num_lwp++; + + free (tasks); + tasks = NULL; } - - *state = '\0'; - ps->num_lwp = 0; - ps->num_proc = 1; - for (i = 0; tasks[i] != 0; i++) - ps->num_lwp++; - - free (tasks); - tasks = NULL; /* Leave the rest at zero if this is only an LWP */ if (ps->num_proc == 0) @@ -498,16 +733,20 @@ int ps_read_process (int pid, procstat_t *ps, char *state) return (0); } - ps->vmem_minflt = atol (fields[9]); - ps->vmem_majflt = atol (fields[11]); - ps->cpu_user = atoll (fields[13]); - ps->cpu_system = atoll (fields[14]); - ps->vmem_rss = atol (fields[23]); + cpu_user_counter = atoll (fields[13]); + cpu_system_counter = atoll (fields[14]); + vmem_rss = atoll (fields[23]); + ps->vmem_minflt_counter = atol (fields[9]); + ps->vmem_majflt_counter = atol (fields[11]); /* Convert jiffies to useconds */ - ps->cpu_user = ps->cpu_user * 1000000 / CONFIG_HZ; - ps->cpu_system = ps->cpu_system * 1000000 / CONFIG_HZ; - ps->vmem_rss = ps->vmem_rss * pagesize_g; + cpu_user_counter = cpu_user_counter * 1000000 / CONFIG_HZ; + cpu_system_counter = cpu_system_counter * 1000000 / CONFIG_HZ; + vmem_rss = vmem_rss * pagesize_g; + + ps->cpu_user_counter = (unsigned long) cpu_user_counter; + ps->cpu_system_counter = (unsigned long) cpu_system_counter; + ps->vmem_rss = (unsigned long) vmem_rss; *state = fields[2][0]; @@ -516,6 +755,42 @@ int ps_read_process (int pid, procstat_t *ps, char *state) } /* int ps_read_process (...) */ #endif /* KERNEL_LINUX */ +#if HAVE_THREAD_INFO +static int mach_get_task_name (task_t t, int *pid, char *name, size_t name_max_len) +{ + int mib[4]; + + struct kinfo_proc kp; + size_t kp_size; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_PID; + + if (pid_for_task (t, pid) != KERN_SUCCESS) + return (-1); + mib[3] = *pid; + + kp_size = sizeof (kp); + if (sysctl (mib, 4, &kp, &kp_size, NULL, 0) != 0) + return (-1); + + if (name_max_len > (MAXCOMLEN + 1)) + name_max_len = MAXCOMLEN + 1; + + strncpy (name, kp.kp_proc.p_comm, name_max_len - 1); + name[name_max_len - 1] = '\0'; + + DBG ("pid = %i; name = %s;", *pid, name); + + /* We don't do the special handling for `p_comm == "LaunchCFMApp"' as + * `top' does it, because it is a lot of work and only used when + * debugging. -octo */ + + return (0); +} +#endif /* HAVE_THREAD_INFO */ + static void ps_read (void) { #if HAVE_THREAD_INFO @@ -528,6 +803,9 @@ static void ps_read (void) task_array_t task_list; mach_msg_type_number_t task_list_len; + int task_pid; + char task_name[MAXCOMLEN + 1]; + int thread; thread_act_array_t thread_list; mach_msg_type_number_t thread_list_len; @@ -540,6 +818,11 @@ static void ps_read (void) int stopped = 0; int blocked = 0; + procstat_t *ps; + procstat_entry_t pse; + + ps_list_reset (); + /* * The Mach-concept is a little different from the traditional UNIX * concept: All the work is done in threads. Threads are contained in @@ -571,6 +854,71 @@ static void ps_read (void) for (task = 0; task < task_list_len; task++) { + ps = NULL; + if (mach_get_task_name (task_list[task], + &task_pid, + task_name, PROCSTAT_NAME_LEN) == 0) + ps = ps_list_search (task_name); + + /* Collect more detailed statistics for this process */ + if (ps != NULL) + { + task_basic_info_data_t task_basic_info; + mach_msg_type_number_t task_basic_info_len; + task_events_info_data_t task_events_info; + mach_msg_type_number_t task_events_info_len; + task_absolutetime_info_data_t task_absolutetime_info; + mach_msg_type_number_t task_absolutetime_info_len; + + memset (&pse, '\0', sizeof (pse)); + pse.id = task_pid; + + task_basic_info_len = TASK_BASIC_INFO_COUNT; + status = task_info (task_list[task], + TASK_BASIC_INFO, + (task_info_t) &task_basic_info, + &task_basic_info_len); + if (status != KERN_SUCCESS) + { + syslog (LOG_ERR, "task_info failed: %s", + mach_error_string (status)); + continue; /* with next thread_list */ + } + + task_events_info_len = TASK_EVENTS_INFO_COUNT; + status = task_info (task_list[task], + TASK_EVENTS_INFO, + (task_info_t) &task_events_info, + &task_events_info_len); + if (status != KERN_SUCCESS) + { + syslog (LOG_ERR, "task_info failed: %s", + mach_error_string (status)); + continue; /* with next thread_list */ + } + + task_absolutetime_info_len = TASK_ABSOLUTETIME_INFO_COUNT; + status = task_info (task_list[task], + TASK_ABSOLUTETIME_INFO, + (task_info_t) &task_absolutetime_info, + &task_absolutetime_info_len); + if (status != KERN_SUCCESS) + { + syslog (LOG_ERR, "task_info failed: %s", + mach_error_string (status)); + continue; /* with next thread_list */ + } + + pse.num_proc++; + pse.vmem_rss = task_basic_info.resident_size; + + pse.vmem_minflt_counter = task_events_info.cow_faults; + pse.vmem_majflt_counter = task_events_info.faults; + + pse.cpu_user_counter = task_absolutetime_info.total_user; + pse.cpu_system_counter = task_absolutetime_info.total_system; + } + status = task_threads (task_list[task], &thread_list, &thread_list_len); if (status != KERN_SUCCESS) @@ -597,7 +945,7 @@ static void ps_read (void) &thread_data_len); if (status != KERN_SUCCESS) { - syslog (LOG_ERR, "thread_info failed: %s\n", + syslog (LOG_ERR, "thread_info failed: %s", mach_error_string (status)); if (task_list[task] != port_task_self) mach_port_deallocate (port_task_self, @@ -605,6 +953,9 @@ static void ps_read (void) continue; /* with next thread_list */ } + if (ps != NULL) + pse.num_lwp++; + switch (thread_data.run_state) { case TH_STATE_RUNNING: @@ -664,6 +1015,9 @@ static void ps_read (void) syslog (LOG_ERR, "mach_port_deallocate failed: %s", mach_error_string (status)); } + + if (ps != NULL) + ps_list_add (task_name, &pse); } /* for (task_list) */ if ((status = vm_deallocate (port_task_self, @@ -685,6 +1039,9 @@ static void ps_read (void) } /* for (pset_list) */ ps_submit (running, sleeping, zombies, stopped, -1, blocked); + + for (ps = list_head_g; ps != NULL; ps = ps->next) + ps_submit_proc_list (ps); /* #endif HAVE_THREAD_INFO */ #elif KERNEL_LINUX @@ -701,12 +1058,13 @@ static void ps_read (void) int status; procstat_t ps; + procstat_entry_t pse; char state; procstat_t *ps_ptr; running = sleeping = zombies = stopped = paging = blocked = 0; - ps_list_reset (list_head_g); + ps_list_reset (); if ((proc = opendir ("/proc")) == NULL) { @@ -729,6 +1087,23 @@ static void ps_read (void) continue; } + pse.id = pid; + pse.age = 0; + + pse.num_proc = ps.num_proc; + pse.num_lwp = ps.num_lwp; + pse.vmem_rss = ps.vmem_rss; + + pse.vmem_minflt = 0; + pse.vmem_minflt_counter = ps.vmem_minflt_counter; + pse.vmem_majflt = 0; + pse.vmem_majflt_counter = ps.vmem_majflt_counter; + + pse.cpu_user = 0; + pse.cpu_user_counter = ps.cpu_user_counter; + pse.cpu_system = 0; + pse.cpu_system_counter = ps.cpu_system_counter; + switch (state) { case 'R': running++; break; @@ -739,8 +1114,7 @@ static void ps_read (void) case 'W': paging++; break; } - if (list_head_g != NULL) - ps_list_add (list_head_g, &ps); + ps_list_add (ps.name, &pse); } closedir (proc); @@ -748,7 +1122,7 @@ static void ps_read (void) ps_submit (running, sleeping, zombies, stopped, paging, blocked); for (ps_ptr = list_head_g; ps_ptr != NULL; ps_ptr = ps_ptr->next) - ps_submit_proc (ps_ptr); + ps_submit_proc_list (ps_ptr); #endif /* KERNEL_LINUX */ } #else @@ -760,6 +1134,8 @@ void module_register (void) plugin_register (MODULE_NAME, ps_init, ps_read, ps_write); plugin_register ("ps_rss", NULL, NULL, ps_rss_write); plugin_register ("ps_cputime", NULL, NULL, ps_cputime_write); + plugin_register ("ps_count", NULL, NULL, ps_count_write); + plugin_register ("ps_pagefaults", NULL, NULL, ps_pagefaults_write); cf_register (MODULE_NAME, ps_config, config_keys, config_keys_num); }