From 07e262fbeaa432753ce8017098465a0e5870c36b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Andr=C3=A9s=20J=2E=20D=C3=ADaz?= Date: Wed, 21 Oct 2009 11:12:40 +0200 Subject: [PATCH] processes plugin: Add collection of IO-metrics. I attach a patch for collectd-4.8.0 adding a new feature which read IO data for a process using /proc//io (only works on linux > 2.6.20), it's very usefull to monitoring disk throughput and what process is related with high disk IO. I'm using this patchs for days in some database environments with good results, but I think that is not enough stable yet (more testing is required). The patch read from /proc//io (where available) the data for syscr and syscw (read and write operations) and rchar and wchar (read and write bytes). --- src/collectd.conf.pod | 2 +- src/processes.c | 112 +++++++++++++++++++++++++++++++++++++++++- src/types.db | 2 + 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/src/collectd.conf.pod b/src/collectd.conf.pod index 3ea3d4dd..6bae3393 100644 --- a/src/collectd.conf.pod +++ b/src/collectd.conf.pod @@ -3235,7 +3235,7 @@ C/var/run/collectd-powerdns>. Select more detailed statistics of processes matching this name. The statistics collected for these selected processes are size of the resident segment size (RSS), user- and system-time used, number of processes and number of threads, -and minor and major pagefaults. +io data (where available) and minor and major pagefaults. =item B I I diff --git a/src/processes.c b/src/processes.c index 5f67abaa..0a8f8c4d 100644 --- a/src/processes.c +++ b/src/processes.c @@ -136,6 +136,12 @@ typedef struct procstat_entry_s unsigned long cpu_user_counter; unsigned long cpu_system_counter; + /* io data */ + long io_rchar; + long io_wchar; + long io_syscr; + long io_syscw; + struct procstat_entry_s *next; } procstat_entry_t; @@ -159,6 +165,12 @@ typedef struct procstat unsigned long cpu_user_counter; unsigned long cpu_system_counter; + /* io data */ + long io_rchar; + long io_wchar; + long io_syscr; + long io_syscw; + struct procstat *next; struct procstat_entry_s *instances; } procstat_t; @@ -328,6 +340,10 @@ static void ps_list_add (const char *name, const char *cmdline, procstat_entry_t pse->vmem_size = entry->vmem_size; pse->vmem_rss = entry->vmem_rss; pse->stack_size = entry->stack_size; + pse->io_rchar = entry->io_rchar; + pse->io_wchar = entry->io_wchar; + pse->io_syscr = entry->io_syscr; + pse->io_syscw = entry->io_syscw; ps->num_proc += pse->num_proc; ps->num_lwp += pse->num_lwp; @@ -335,6 +351,11 @@ static void ps_list_add (const char *name, const char *cmdline, procstat_entry_t ps->vmem_rss += pse->vmem_rss; ps->stack_size += pse->stack_size; + ps->io_rchar += ((pse->io_rchar == -1)?0:pse->io_rchar); + ps->io_wchar += ((pse->io_wchar == -1)?0:pse->io_wchar); + ps->io_syscr += ((pse->io_syscr == -1)?0:pse->io_syscr); + ps->io_syscw += ((pse->io_syscw == -1)?0:pse->io_syscw); + if ((entry->vmem_minflt_counter == 0) && (entry->vmem_majflt_counter == 0)) { @@ -425,6 +446,10 @@ static void ps_list_reset (void) ps->vmem_size = 0; ps->vmem_rss = 0; ps->stack_size = 0; + ps->io_rchar = -1; + ps->io_wchar = -1; + ps->io_syscr = -1; + ps->io_syscw = -1; pse_prev = NULL; pse = ps->instances; @@ -607,12 +632,33 @@ static void ps_submit_proc_list (procstat_t *ps) vl.values_len = 2; plugin_dispatch_values (&vl); + if ( (ps->io_rchar != -1) && (ps->io_wchar != -1) ) + { + sstrncpy (vl.type, "ps_diskbytes", sizeof (vl.type)); + vl.values[0].counter = ps->io_rchar; + vl.values[1].counter = ps->io_wchar; + vl.values_len = 2; + plugin_dispatch_values (&vl); + } + + if ( (ps->io_syscr != -1) && (ps->io_syscw != -1) ) + { + sstrncpy (vl.type, "ps_diskops", sizeof (vl.type)); + vl.values[0].counter = ps->io_syscr; + vl.values[1].counter = ps->io_syscw; + vl.values_len = 2; + plugin_dispatch_values (&vl); + } + DEBUG ("name = %s; num_proc = %lu; num_lwp = %lu; vmem_rss = %lu; " "vmem_minflt_counter = %lu; vmem_majflt_counter = %lu; " - "cpu_user_counter = %lu; cpu_system_counter = %lu;", + "cpu_user_counter = %lu; cpu_system_counter = %lu; " + "io_rchar = %ld; io_wchar = %ld; " + "io_syscr = %ld; io_syscw = %ld;", ps->name, ps->num_proc, ps->num_lwp, ps->vmem_rss, ps->vmem_minflt_counter, ps->vmem_majflt_counter, - ps->cpu_user_counter, ps->cpu_system_counter); + ps->cpu_user_counter, ps->cpu_system_counter, + ps->io_rchar, ps->io_wchar, ps->io_syscr, ps->io_syscw); } /* void ps_submit_proc_list */ /* ------- additional functions for KERNEL_LINUX/HAVE_THREAD_INFO ------- */ @@ -644,6 +690,52 @@ static int ps_read_tasks (int pid) return ((count >= 1) ? count : 1); } /* int *ps_read_tasks */ +procstat_t *ps_read_io (int pid, procstat_t *ps) +{ + FILE *fh; + char buffer[1024]; + char filename[64]; + + char *fields[8]; + int numfields; + + ssnprintf (filename, sizeof (filename), "/proc/%i/io", pid); + if ((fh = fopen (filename, "r")) == NULL) + return (NULL); + + while (fgets (buffer, 1024, fh) != NULL) + { + long *val = NULL; + + if (strncasecmp (buffer, "rchar:", 6) == 0) + val = &(ps->io_rchar); + else if (strncasecmp (buffer, "wchar:", 6) == 0) + val = &(ps->io_wchar); + else if (strncasecmp (buffer, "syscr:", 6) == 0) + val = &(ps->io_syscr); + else if (strncasecmp (buffer, "syscw:", 6) == 0) + val = &(ps->io_syscw); + else + continue; + + numfields = strsplit (buffer, fields, 8); + + if (numfields < 2) + continue; + + *val = atol (fields[1]); + } + + if (fclose (fh)) + { + char errbuf[1024]; + WARNING ("processes: fclose: %s", + sstrerror (errno, errbuf, sizeof (errbuf))); + } + + return (ps); +} /* procstat_t *ps_read_io */ + int ps_read_process (int pid, procstat_t *ps, char *state) { char filename[64]; @@ -746,6 +838,17 @@ int ps_read_process (int pid, procstat_t *ps, char *state) ps->vmem_rss = (unsigned long) vmem_rss; ps->stack_size = (unsigned long) stack_size; + if ( (ps_read_io (pid, ps)) == NULL) + { + /* no io data */ + ps->io_rchar = -1; + ps->io_wchar = -1; + ps->io_syscr = -1; + ps->io_syscw = -1; + + DEBUG("ps_read_process: not get io data for pid %i",pid); + } + /* success */ return (0); } /* int ps_read_process (...) */ @@ -1274,6 +1377,11 @@ static int ps_read (void) pse.cpu_system = 0; pse.cpu_system_counter = ps.cpu_system_counter; + pse.io_rchar = ps.io_rchar; + pse.io_wchar = ps.io_wchar; + pse.io_syscr = ps.io_syscr; + pse.io_syscw = ps.io_syscw; + switch (state) { case 'R': running++; break; diff --git a/src/types.db b/src/types.db index 0225e0f6..aed5a22e 100644 --- a/src/types.db +++ b/src/types.db @@ -123,6 +123,8 @@ protocol_counter value:COUNTER:0:U ps_count processes:GAUGE:0:1000000, threads:GAUGE:0:1000000 ps_cputime user:COUNTER:0:16000000, syst:COUNTER:0:16000000 ps_pagefaults minflt:COUNTER:0:9223372036854775807, majflt:COUNTER:0:9223372036854775807 +ps_diskbytes read:COUNTER:0:16000000, write:COUNTER:0:16000000 +ps_diskops read:COUNTER:0:16000000, write:COUNTER:0:16000000 ps_rss value:GAUGE:0:9223372036854775807 ps_stacksize value:GAUGE:0:9223372036854775807 ps_state value:GAUGE:0:65535 -- 2.30.2