contrib/extractDS.px: Enhanced the script to modify RRD files a bit.

author Florian Forster <octo@huhu.verplant.org>

Fri, 22 Feb 2008 16:24:23 +0000 (17:24 +0100)

committer Florian Forster <octo@huhu.verplant.org>

Fri, 22 Feb 2008 16:24:23 +0000 (17:24 +0100)
author Florian Forster <octo@huhu.verplant.org>
Fri, 22 Feb 2008 16:24:23 +0000 (17:24 +0100)
committer Florian Forster <octo@huhu.verplant.org>
Fri, 22 Feb 2008 16:24:23 +0000 (17:24 +0100)
diff --git a/contrib/extractDS.px b/contrib/extractDS.px

index bdc4b3d2b0641e303704ec9dc1f96abb3cf95e2a..80d873b9a7480be87ea1b74ae1bbc9d9d2d7c4ac 100755 (executable)
--- a/contrib/extractDS.px
+++ b/contrib/extractDS.px
@@ -1,20 +1,39 @@
  #!/usr/bin/perl
  
+# collectd - contrib/rrd_filter.px
+# Copyright (C) 2007-2008  Florian octo Forster
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; only version 2 of the License is applicable.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+#
+# Authors:
+#   Florian octo Forster <octo at verplant.org>
+
  use strict;
  use warnings;
  
  =head1 NAME
  
-extractDS.px - Extract a single data-source from an RRD-file
+rrd_filter.px - Perform same advanced non-standard operations on an RRD file.
  
  =head1 SYNOPSYS
  
-  extractDS.px -i input.rrd -s source_ds -o output.rrd -d destination_ds
+  rrd_filter.px -i input.rrd -o output.rrd [options]
  
  =head1 DEPENDENCIES
  
-extractDS.px requires Perl and the included L<Getopt::Long> module, as well as
-the L<XML::Simple> module.
+extractDS.px requires the RRDTool binary, Perl and the included L<Getopt::Long>
+module.
  
  =cut
  
@@ -26,25 +45,95 @@ our $InDS = [];
  our $OutFile;
  our $OutDS = [];
  
+our $NewRRAs = [];
+
+our $Step = 0;
+
+=head1 OPTIONS
+
+The following options can be passed on the command line:
+
+=over 4
+
+=item B<--infile> I<file>
+
+=item B<-i> I<file>
+
+Reads from I<file>. If I<file> ends in C<.rrd>, then C<rrdtool dump> is invoked
+to create an XML dump of the RRD file. Otherwise the XML dump is expected
+directly. The special filename C<-> can be used to read from STDIN.
+
+=item B<--outfile> I<file>
+
+=item B<-o> I<file>
+
+Writes output to I<file>. If I<file> ends in C<.rrd>, then C<rrdtool restore>
+is invoked to create a binary RRD file. Otherwise an XML output is written. The
+special filename C<-> can be used to write to STDOUT.
+
+=item B<--map> I<in_ds>:I<out_ds>
+
+=item B<-m> I<in_ds>:I<out_ds>
+
+Writes the datasource I<in_ds> to the output and renames it to I<out_ds>. This
+is useful to extract one DS from an RRD file.
+
+=item B<--step> I<seconds>
+
+=item B<-s> I<seconds>
+
+Changes the step of the output RRD file to be I<seconds>. The new stepsize must
+be a multiple of the old stepsize of the other way around. When increasing the
+stepsize the number of PDPs in each RRA must be dividable by the factor by
+which the stepsize is increased. The length of CDPs and the absolute length of
+RRAs (and thus the data itself) is not altered.
+
+Examples:
+
+  step =  10, rra_steps = 12   =>   step = 60, rra_steps =  2
+  step = 300, rra_steps =  1   =>   step = 10, rra_steps = 30
+
+=item B<--rra> B<RRA>:I<CF>:I<XFF>:I<steps>:I<rows>
+
+=item B<-a> B<RRA>:I<CF>:I<XFF>:I<steps>:I<rows>
+
+Inserts a new RRA in the generated RRD file. This is done B<after> the step has
+been adjusted, take that into account when specifying I<steps> and I<rows>. For
+an explanation of the format please see L<rrdcreate(1)>.
+
+=back
+
+=cut
+
  GetOptions ("infile|i=s" => \$InFile,
-       "inds|s=s" => sub { push (@$InDS, $_[1]); },
         "outfile|o=s" => \$OutFile,
-       "outds|d=s" => sub { push (@$OutDS, $_[1]); })
-       or exit (1);
+       'map|m=s' => sub
+       {
+               my ($in_ds, $out_ds) = split (':', $_[1]);
+               if (!defined ($in_ds) || !defined ($out_ds))
+               {
+                       print STDERR "Argument for `map' incorrect! The format is `--map in_ds:out_ds'\n";
+                       exit (1);
+               }
+               push (@$InDS, $in_ds);
+               push (@$OutDS, $out_ds);
+       },
+       'step|s=i' => \$Step,
+       'rra|a=s' => sub
+       {
+               my ($rra, $cf, $xff, $steps, $rows) = split (':', $_[1]);
+               if (($rra ne 'RRA') || !defined ($rows))
+               {
+                       print STDERR "Please use the standard RRDTool syntax when adding RRAs. I. e. RRA:<cf><xff>:<steps>:<rows>.\n";
+                       exit (1);
+               }
+               push (@$NewRRAs, {cf => $cf, xff => $xff, steps => $steps, rows => $rows});
+       }
+) or exit (1);
  
-if (!$InFile || !$OutFile || !@$InDS || !@$OutDS)
+if (!$InFile || !$OutFile)
  {
-       print STDERR "Usage: $0 -i <infile> -s <inds> -o <outfile> -d <outds>\n";
-       exit (1);
-}
-if (!-f $InFile)
-{
-       print STDERR "Input file does not exist\n";
-       exit (1);
-}
-if (-f $OutFile)
-{
-       print STDERR "Output file does exist\n";
+       print STDERR "Usage: $0 -i <infile> -m <in_ds>:<out_ds> -s <step>\n";
         exit (1);
  }
  if ((1 + @$InDS) != (1 + @$OutDS))
@@ -53,7 +142,7 @@ if ((1 + @$InDS) != (1 + @$OutDS))
         exit (1);
  }
  
-extract_ds ($InFile, $OutFile);
+main ($InFile, $OutFile);
  exit (0);
  
  {
@@ -64,10 +153,17 @@ my $current_index;
  # state 2 == parse values
  my $state;
  my $out_cache;
-sub handle_line
+sub handle_line_dsmap
  {
-       my $fh = shift;
         my $line = shift;
+       my $index = shift;
+       my $ret = '';
+
+       if ((@$InDS == 0) || (@$OutDS == 0))
+       {
+               post_line ($line, $index + 1);
+               return;
+       }
  
         if (!defined ($state))
         {
@@ -118,7 +214,10 @@ sub handle_line
                         for (my $new_index = 0; $new_index < @$InDS; $new_index++)
                         {
                                 my $old_index = $ds_index->[$new_index];
-                               print $fh $out_cache->[$old_index];
+                               while ($out_cache->[$old_index] =~ m/^(.*)$/gm)
+                               {
+                                       post_line ("$1\n", $index + 1);
+                               }
                         }
  
                         # Clear the cache - it's used in state1, too.
@@ -127,14 +226,14 @@ sub handle_line
                                 $out_cache->[$i] = '';
                         }
  
-                       print $fh $line;
+                       $ret .= $line;
                         $current_index = -1;
                         $state = 1;
                 }
                 elsif ($current_index == -1)
                 {
                         # Print all the lines before the first DS definition
-                       print $fh $line;
+                       $ret .= $line;
                 }
                 else
                 {
@@ -155,7 +254,10 @@ sub handle_line
                         for (my $new_index = 0; $new_index < @$InDS; $new_index++)
                         {
                                 my $old_index = $ds_index->[$new_index];
-                               print $fh $out_cache->[$old_index];
+                               while ($out_cache->[$old_index] =~ m/^(.*)$/gm)
+                               {
+                                       post_line ("$1\n", $index + 1);
+                               }
                         }
  
                         # Clear the cache
@@ -164,19 +266,19 @@ sub handle_line
                                 $out_cache->[$i] = '';
                         }
  
-                       print $fh $line;
+                       $ret .= $line;
                         $current_index = -1;
                 }
                 elsif ($line =~ m#<database>#)
                 {
-                       print $fh $line;
+                       $ret .= $line;
                         $state = 2;
                 }
                 elsif ($current_index == -1)
                 {
                         # Print all the lines before the first DS definition
                         # and after cdp_prep
-                       print $fh $line;
+                       $ret .= $line;
                 }
                 else
                 {
@@ -188,7 +290,7 @@ sub handle_line
         {
                 if ($line =~ m#</database>#)
                 {
-                       print $fh $line;
+                       $ret .= $line;
                         $current_index = -1;
                         $state = 1;
                 }
@@ -196,13 +298,14 @@ sub handle_line
                 {
                         my @values = ();
                         my $i;
-                       my $output = "\t\t";
+                       
+                       $ret .= "\t\t";
  
                         if ($line =~ m#(<!-- .*? -->)#)
                         {
-                               $output .= "$1 ";
+                               $ret .= "$1 ";
                         }
-                       $output .= "<row> ";
+                       $ret .= "<row> ";
  
                         $i = 0;
                         while ($line =~ m#<v>\s*([^<\s]+)\s*</v>#g)
@@ -214,19 +317,249 @@ sub handle_line
                         for (my $new_index = 0; $new_index < @$InDS; $new_index++)
                         {
                                 my $old_index = $ds_index->[$new_index];
-                               $output .= '<v> ' . $values[$old_index] . ' </v> ';
+                               $ret .= '<v> ' . $values[$old_index] . ' </v> ';
                         }
-                       $output .= "</row>\n";
-                       print $fh $output;
+                       $ret .= "</row>\n";
                 }
         }
         else
         {
                 die;
         }
-}} # handle_line
  
-sub extract_ds
+       if ($ret)
+       {
+               post_line ($ret, $index + 1);
+       }
+}} # handle_line_dsmap
+
+#
+# The _step_ handler
+#
+{
+my $step_factor_up;
+my $step_factor_down;
+sub handle_line_step
+{
+       my $line = shift;
+       my $index = shift;
+
+       if (!$Step)
+       {
+               post_line ($line, $index + 1);
+               return;
+       }
+
+       $step_factor_up ||= 0;
+       $step_factor_down ||= 0;
+
+       if (($step_factor_up == 0) && ($step_factor_down == 0))
+       {
+               if ($line =~ m#<step>\s*(\d+)\s*</step>#i)
+               {
+                       my $old_step = 0 + $1;
+                       if ($Step < $old_step)
+                       {
+                               $step_factor_down = int ($old_step / $Step);
+                               if (($step_factor_down * $Step) != $old_step)
+                               {
+                                       print STDERR "The old step ($old_step seconds) "
+                                       . "is not a multiple of the new step "
+                                       . "($Step seconds).\n";
+                                       exit (1);
+                               }
+                               $line = "<step> $Step </step>\n";
+                       }
+                       elsif ($Step > $old_step)
+                       {
+                               $step_factor_up = int ($Step / $old_step);
+                               if (($step_factor_up * $old_step) != $Step)
+                               {
+                                       print STDERR "The new step ($Step seconds) "
+                                       . "is not a multiple of the old step "
+                                       . "($old_step seconds).\n";
+                                       exit (1);
+                               }
+                               $line = "<step> $Step </step>\n";
+                       }
+                       else
+                       {
+                               $Step = 0;
+                       }
+               }
+       }
+       elsif ($line =~ m#<pdp_per_row>\s*(\d+)\s*</pdp_per_row>#i)
+       {
+               my $old_val = 0 + $1;
+               my $new_val;
+               if ($step_factor_up)
+               {
+                       $new_val = int ($old_val / $step_factor_up);
+                       if (($new_val * $step_factor_up) != $old_val)
+                       {
+                               print STDERR "Can't divide number of PDPs per row ($old_val) by step-factor ($step_factor_up).\n";
+                               exit (1);
+                       }
+               }
+               else
+               {
+                       $new_val = $step_factor_down * $old_val;
+               }
+               $line = "<pdp_per_row> $new_val </pdp_per_row>\n";
+       }
+
+       post_line ($line, $index + 1);
+}} # handle_line_step
+
+#
+# The _add RRA_ handler
+#
+{
+my $add_rra_done;
+my $num_ds;
+sub handle_line_add_rra
+{
+  my $line = shift;
+  my $index = shift;
+
+  my $post = sub { for (@_) { post_line ($_, $index + 1); } };
+
+  $num_ds ||= 0;
+
+  if (!@$NewRRAs || $add_rra_done)
+  {
+    $post->($line);
+    return;
+  }
+
+  if ($line =~ m#<ds>#i)
+  {
+    $num_ds++;
+  }
+  elsif ($line =~ m#<rra>#i)
+  {
+    for (my $i = 0; $i < @$NewRRAs; $i++)
+    {
+      my $rra = $NewRRAs->[$i];
+      my $temp;
+      $post->("\t<rra>\n",
+      "\t\t<cf> $rra->{'cf'} </cf>\n",
+      "\t\t<pdp_per_row> $rra->{'steps'} </pdp_per_row>\n",
+      "\t\t<params>\n",
+      "\t\t\t<xff> $rra->{'xff'} </xff>\n",
+      "\t\t</params>\n",
+      "\t\t<cdp_prep>\n");
+
+      for (my $j = 0; $j < $num_ds; $j++)
+      {
+       $post->("\t\t\t<ds>\n",
+       "\t\t\t\t<primary_value> NaN </primary_value>\n",
+       "\t\t\t\t<secondary_value> NaN </secondary_value>\n",
+       "\t\t\t\t<value> NaN </value>\n",
+       "\t\t\t\t<unknown_datapoints> 0 </unknown_datapoints>\n",
+       "\t\t\t</ds>\n");
+      }
+
+      $post->("\t\t</cdp_prep>\n", "\t\t<database>\n");
+      $temp = "\t\t\t<row>" . join ('', map { "<v> NaN </v>" } (1 .. $num_ds)) . "</row>\n";
+      for (my $j = 0; $j < $rra->{'rows'}; $j++)
+      {
+       $post->($temp);
+      }
+      $post->("\t\t</database>\n");
+    }
+  }
+
+  $post->($line);
+}} # handle_line_add_rra
+
+#
+# The _output_ handler
+#
+{
+my $fh;
+sub set_output
+{
+       $fh = shift;
+}
+
+sub handle_line_output
+{
+       my $line = shift;
+       my $index = shift;
+
+       if (!defined ($fh))
+       {
+               post_line ($line, $index + 1);
+               return;
+       }
+       
+       print $fh $line;
+}} # handle_line_output
+
+#
+# Dispatching logic
+#
+{
+my @handlers = ();
+sub add_handler
+{
+       my $handler = shift;
+
+       die unless (ref ($handler) eq 'CODE');
+       push (@handlers, $handler);
+} # add_handler
+
+sub post_line
+{
+       my $line = shift;
+       my $index = shift;
+
+       if (0)
+       {
+               my $copy = $line;
+               chomp ($copy);
+               print "DEBUG: post_line ($copy, $index);\n";
+       }
+
+       if ($index > $#handlers)
+       {
+               return;
+       }
+       $handlers[$index]->($line, $index);
+}} # post_line
+
+sub handle_fh
+{
+       my $in_fh = shift;
+       my $out_fh = shift;
+
+       set_output ($out_fh);
+
+       if (@$InDS)
+       {
+         add_handler (\&handle_line_dsmap);
+       }
+
+       if ($Step)
+       {
+         add_handler (\&handle_line_step);
+       }
+
+       if (@$NewRRAs)
+       {
+         add_handler (\&handle_line_add_rra);
+       }
+
+       add_handler (\&handle_line_output);
+
+       while (my $line = <$in_fh>)
+       {
+               post_line ($line, 0);
+       }
+} # handle_fh
+
+sub main
  {
         my $in_file = shift;
         my $out_file = shift;
@@ -234,17 +567,53 @@ sub extract_ds
         my $in_fh;
         my $out_fh;
  
-       open ($in_fh,  '-|', 'rrdtool', 'dump', $in_file) or die ("open (rrdtool): $!");
-       open ($out_fh, '|-', 'rrdtool', 'restore', '-', $out_file) or die ("open (rrdtool): $!");
+       my $in_needs_close = 1;
+       my $out_needs_close = 1;
  
-       while (my $line = <$in_fh>)
+       if ($in_file =~ m/\.rrd$/i)
+       {
+               open ($in_fh,  '-|', 'rrdtool', 'dump', $in_file) or die ("open (rrdtool): $!");
+       }
+       elsif ($in_file eq '-')
         {
-               handle_line ($out_fh, $line);
+               $in_fh = \*STDIN;
+               $in_needs_close = 0;
         }
+       else
+       {
+               open ($in_fh, '<', $in_file) or die ("open ($in_file): $!");
+       }
+
+       if ($out_file =~ m/\.rrd$/i)
+       {
+               open ($out_fh, '|-', 'rrdtool', 'restore', '-', $out_file) or die ("open (rrdtool): $!");
+       }
+       elsif ($out_file eq '-')
+       {
+               $out_fh = \*STDOUT;
+               $out_needs_close = 0;
+       }
+       else
+       {
+               open ($out_fh, '>', $out_file) or die ("open ($out_file): $!");
+       }
+
+       handle_fh ($in_fh, $out_fh);
+
+       if ($in_needs_close)
+       {
+               close ($in_fh);
+       }
+       if ($out_needs_close)
+       {
+               close ($out_fh);
+       }
+} # main
+
+=head1 LICENSE
  
-       close ($in_fh);
-       close ($out_fh);
-} # extract_ds
+This script is licensed under the GNU general public license, versionE<nbsp>2
+(GPLv2).
  
  =head1 AUTHOR
author	Florian Forster <octo@huhu.verplant.org>
	Fri, 22 Feb 2008 16:24:23 +0000 (17:24 +0100)
committer	Florian Forster <octo@huhu.verplant.org>
	Fri, 22 Feb 2008 16:24:23 +0000 (17:24 +0100)