contrib/rrdjig/rrdjig.pl

   1 #!/usr/bin/perl -w
   2 require 5.008;
   3 use lib qw(/scratch/rrd-1.4.3-test2/lib/perl);
   4 use RRDs;
   5 use strict;
   6 use Getopt::Long 2.25 qw(:config posix_default no_ignore_case);
   7 use Pod::Usage 1.14;
   8 use Data::Dumper;
   9
  10 '$Revision$ ' =~ /Revision: (\S*)/;
  11 my $Revision = $1;
  12
  13 # main loop
  14 my %opt = ();
  15 sub main()
  16 {
  17     # parse options
  18     GetOptions(\%opt, 'help|h', 'man', 'version', 'noaction|no-action|n',
  19         'verbose|v','src-tmpl=s','dst-tmpl=s') or exit(1);
  20     if($opt{help})     { pod2usage(1) }
  21     if($opt{man})      { pod2usage(-exitstatus => 0, -verbose => 2) }
  22     if($opt{version})  { print "rrdjig $Revision\n"; exit(0) }
  23     my $src = shift @ARGV or pod2usage(1);
  24     if (not -r $src)   { pod2usage("Reading $src: $!") }
  25     my $dst = shift @ARGV or pod2usage(1);
  26     if (not -w $dst)   { pod2usage("Accessing $dst: $!") }
  27
  28     rrdjig($src,$opt{'src-tmpl'},$dst,$opt{'dst-tmpl'});
  29 }
  30
  31 main;
  32
  33 sub rrd_err_check(){
  34     my $err = RRDs::error();
  35     if ($err){
  36         die "RRD Error: $err\n";
  37     }
  38 }
  39
  40 # how should the data be fetched from the source
  41 # to provide the best approximation of the original data
  42
  43 sub step_sync ($$){
  44     my $value = shift;
  45     my $step = shift;
  46     return ($value - ($value % $step));
  47 }
  48
  49 sub get_rra_size_map($){
  50     my $info = shift;
  51     my $map = {};
  52     my $min_start;
  53     for (my $i=0;;$i++){
  54         my $cf = $info->{"rra[$i].cf"};
  55         last if not $cf;
  56         next if $cf !~ /AVERAGE|MIN|MAX/;
  57         my $pdp_per_row = $info->{"rra[$i].pdp_per_row"};
  58         next if $cf =~ /MIN|MAX/ and $pdp_per_row == 1;
  59         my $rows = $info->{"rra[$i].rows"};
  60         my $step = $pdp_per_row*$info->{step};
  61         my $start = step_sync($info->{last_update},$step) - $step*$rows;
  62         if (not defined $min_start or $start < $min_start) {
  63             $min_start = $start;
  64         }
  65         if (  $map->{$cf}{$pdp_per_row}{rows} || 0 < $rows
  66             or $map->{$cf}{$pdp_per_row}{start} || 0 < $start ){
  67             $map->{$cf}{$pdp_per_row} = {
  68                 id   => $i,
  69                 rows => $rows,
  70                 step => $step,
  71                 start => $start
  72             };
  73         }
  74     }
  75     return ($min_start,$map);
  76 }
  77
  78
  79 sub prep_fetch_tasks ($$){
  80     my $src_info = shift;
  81     my $dst_info = shift;
  82     my ($min_start,$src_size) = get_rra_size_map($src_info);
  83     my $now = step_sync($src_info->{last_update}, $src_info->{step});
  84     my $first = step_sync($dst_info->{last_update} , $dst_info->{step});
  85     if ($min_start > $first ) {
  86         $first = $min_start;
  87     }
  88     print "Search $first to $now\n" if $opt{verbose};
  89     my $task = {};
  90     for my $cf (qw(AVERAGE MIN MAX)){
  91         my $x = $src_size->{$cf};
  92         my $pointer = $now;
  93         $task->{$cf} = [];
  94         for my $pdp_per_row (sort {$a <=> $b} keys %$x){
  95             my $step = $x->{$pdp_per_row}{step};
  96             my $new_pointer = $x->{$pdp_per_row}{start};
  97             print "look $cf $pdp_per_row * $step - $new_pointer\n" if $opt{verbose};
  98             if ($new_pointer <= $first){
  99                 $new_pointer = step_sync($first,$step);
 100             }
 101             if ($new_pointer <= $pointer){
 102                 unshift @{$task->{$cf}}, {
 103                     start => $new_pointer,
 104                     end => step_sync($pointer,$step),
 105                     step => $step
 106                 };
 107                 $pointer = $new_pointer;
 108             }
 109             last if $pointer <= $first;
 110         }
 111     }
 112     return ($first,$task);
 113 }
 114
 115 sub fetch_data($$$){
 116     my $src = shift;
 117     my $first = shift;
 118     my $tasks = shift;
 119     my %data;
 120     my @tmpl;
 121     if ($opt{'src-tmpl'}){
 122         @tmpl = split /:/, $opt{'src-tmpl'};
 123     }
 124     my %map;
 125     for my $cf (keys %$tasks){
 126         print STDERR "FETCH #### CF $cf #####################################\n"
 127             if $opt{verbose};
 128         for my $t (@{$tasks->{$cf}}){
 129             my ($start,$step,$names,$array) = RRDs::fetch(
 130                 $src,$cf,'--resolution',$t->{step},
 131                 '--start',$t->{start},'--end',$t->{end}
 132             );
 133             my $id = 0;
 134             if (@tmpl and not %map){
 135                 %map = ( map { ($_,$id++) } @$names );
 136                 for my $key (@tmpl){
 137                     die "ERROR: src key '$key' is not known in $src. Pick from ".join(':',@$names)."\n"
 138                         if not exists $map{$key};
 139                 }
 140             }
 141             rrd_err_check();
 142             print STDERR "FETCH: want setp $t->{step} -> got step $step  / want start $t->{start} -> got start $start\n" if $opt{verbose};
 143             my $now = $start;
 144             while (my $row = shift @$array){
 145                 if (@tmpl){
 146                     push @{$data{$cf}} , [ $now, $step, [ @$row[@map{@tmpl}] ] ];
 147                 }
 148                 else {
 149                     push @{$data{$cf}} , [ $now, $step, $row ];
 150                 }
 151                 $now+=$step;
 152             }
 153         }
 154     }
 155     die "ERROR: no AVERAGE RRA found in src rrd. Enhance me to be able to deal with this!\n"
 156         if not $data{AVERAGE};
 157     # if older data is required, generate a fake average entry.
 158     my $start = $data{AVERAGE}[0][0] - $data{AVERAGE}[0][1];
 159     if ($start > $first ) {
 160         my $step = $start - $first;
 161         unshift @{$data{AVERAGE}}, [ $start, $step, [ map {undef} @{$data{AVERAGE}[0][2]} ] ];
 162     }
 163     return (\%data);
 164 }
 165
 166 sub reupdate($$$){
 167     my $step = shift;
 168     my $dst = shift;
 169     my $data = shift;
 170     my @min;
 171     my @max;
 172     my @pending = map { 0 } @{$data->{AVERAGE}[0][2]};
 173     my $hide_cnt = 0;
 174     my @up;
 175     while (my $av = shift @{$data->{AVERAGE}}){
 176         my $end = $av->[0];
 177         my $start = $end - $av->[1];
 178         if (my $av_nx = $data->{AVERAGE}[0]){
 179             my $start_nx = $av_nx->[0] - $av_nx->[1];
 180             if ($end > $start_nx){
 181                 $end = $start_nx;
 182             }
 183         }
 184         STEP:
 185         for (my $t = $start+$step;$t<=$end;$t+=$step){
 186             my @out = @{$av->[2]};
 187             # lets see if we a usable a MIN or MAX entry pending
 188             if ($hide_cnt <= 2 and $av->[1] > $step) {
 189                 for my $cf (qw(MIN MAX)){
 190                     my $m = $data->{$cf}[0];
 191                     # drop any MIN/MAX entries which we could not use
 192                     while ($m->[0] <= $start) {
 193                         print STDERR "# DROP $cf $m->[0], $m->[1]\n" if $opt{verbose};
 194                         shift @{$data->{$cf}};
 195                         $m = $data->{$cf}[0];
 196                     }
 197                     my $cend = $m->[0];
 198                     my $cstep = $m->[1];
 199                     my $crow = $m->[2];
 200                     if ($cend >= $t and $cend - $cstep <= $t - $step){
 201                         my $row = "$t:".join(':',map {defined $_ ? $_ : 'U'} @{$crow});
 202                         if ($cf eq 'MIN'){
 203                             @min = @{$crow};
 204                         } else {
 205                             @max = @{$crow};
 206                         }
 207                         print STDERR ($cf eq 'MIN' ? 'm' : 'M' ) ,$row,"\n" if $opt{verbose};
 208                         push @up, $row;
 209                         $hide_cnt++;
 210                         for (my $i = 0; $i <@$crow; $i++){
 211                             if (defined $pending[$i]){
 212                                 if (defined $crow->[$i] and defined $out[$i]){
 213                                     my $keep = ($out[$i] - $crow->[$i]);
 214 #                                   print STDERR " - keep $keep\n" if $opt{verbose};
 215                                     $pending[$i] += $keep;
 216                                 }
 217                                 else {
 218                                     $pending[$i] = undef;
 219                                 }
 220                             }
 221                         }
 222                         shift @{$data->{$cf}};
 223                         next STEP;
 224                     }
 225                 }
 226             }
 227
 228             # compensate for data not shown while insering fake MIN/MAX entries
 229             for (my $i = 0; $i < @out; $i++){
 230                 if (defined $out[$i] and defined $pending[$i] and $pending[$i] != 0){
 231                     my $new = $out[$i] + $pending[$i];
 232                     if (defined $max[$i] and $new > $max[$i]) {
 233                         $pending[$i] = $new - $max[$i];
 234                         $out[$i] = $max[$i];
 235 #                       print STDERR " - maxout $i $out[$i]\n" if $opt{verbose};
 236                     }
 237                     elsif (defined $min[$i] and $new < $min[$i]){
 238                         $pending[$i] = $new - $min[$i];
 239                         $out[$i] = $min[$i];
 240 #                       print STDERR " - minout $i $out[$i]\n" if $opt{verbose};
 241                     }
 242                     else {
 243                         $pending[$i] = 0;
 244                         $out[$i] = $new;
 245 #                       print STDERR " - combined $i $out[$i]\n" if $opt{verbose};
 246                     }
 247                 }
 248                 else {
 249                     $pending[$i] = 0;
 250                 }
 251             }
 252             $hide_cnt = 0;
 253             # show the result;
 254             my $row = "$t:".join(':',map {defined $_ ? $_ : 'U'} @out);
 255             print STDERR " ",$row,"\n" if $opt{verbose};
 256             push @up, $row;
 257         }
 258     }
 259     pop @up; # the last update is most likely one too many ...
 260     if (@up == 0) {
 261         warn "WARNING: src has no entries new enough to fill dst\n";
 262     } else {
 263         RRDs::update($dst,
 264                      $opt{'dst-tmpl'} ? '--template='.$opt{'dst-tmpl'} : (),
 265                      @up);
 266         rrd_err_check();
 267     }
 268 }
 269
 270 sub set_gauge($$){
 271     my $dst = shift;
 272     my $info = shift;
 273     my @tasks;
 274     for my $key (keys %$info) {
 275         if ($key =~ m/^ds\[(.+)\]\.type$/
 276             and $info->{$key} ne 'GAUGE'){
 277             print STDERR "DS $1 -> GAUGE\n" if $opt{verbose};
 278             push @tasks, "--data-source-type=${1}:GAUGE";
 279         }
 280         if (@tasks) {
 281             RRDs::tune($dst,@tasks);
 282             rrd_err_check();
 283         }
 284     }
 285 }
 286
 287 sub unset_gauge($$){
 288     my $dst = shift;
 289     my $info = shift;
 290     my @tasks;
 291     for my $key (keys %$info) {
 292         if ($key =~ m/^ds\[(.+)\]\.type$/
 293             and $info->{$key} ne 'GAUGE'){
 294             print STDERR "DS $1 -> $info->{$key}\n" if $opt{verbose};
 295             push @tasks, "--data-source-type=${1}:$info->{$key}";
 296         }
 297         if (@tasks) {
 298             RRDs::tune($dst,@tasks);
 299             rrd_err_check();
 300         }
 301     }
 302 }
 303
 304 sub rrdjig($$$$){
 305     my $src = shift;
 306     my $src_tmpl = shift;
 307     my $dst = shift;
 308     my $dst_tmpl = shift;
 309     my $dst_info = RRDs::info($dst);
 310     rrd_err_check();
 311     my $src_info = RRDs::info($src);
 312     rrd_err_check();
 313     my ($first,$fetch_tasks) = prep_fetch_tasks($src_info,$dst_info);
 314     my $updates = fetch_data($src,$first,$fetch_tasks);
 315     set_gauge($dst,$dst_info);
 316     reupdate($src_info->{step},$dst,$updates);
 317     unset_gauge($dst,$dst_info);
 318 }
 319
 320
 321 __END__
 322
 323 =head1 NAME
 324
 325 rrdjig - use data from an existing rrd file to populate a new one
 326
 327 =head1 SYNOPSIS
 328
 329 B<rrdjig> [I<options>...] I<src.rrd> I<dest.rrd>
 330
 331      --man           show man-page and exit
 332  -h, --help          display this help and exit
 333      --version       show version information and exit
 334      --verbose       talk while you work
 335      --noaction      just talk don't act
 336      --src-tmpl=tmpl output template for the source rrd
 337      --dst-tmpl=tmpl input template for the destination rrd
 338
 339 =head1 DESCRIPTION
 340
 341 In rrdtool, data gets processed immediately upon arrival. This means that
 342 the original data is never stored and it is thus not easily possible to
 343 restructure data at a later stage. In the rrdtool core there are no
 344 functions to modify the base step size nor the number and types of RRAs in a
 345 graceful manner.
 346
 347 The rrdjig tool tries to rebuild the original data as closely as possible
 348 based on the data found in the rrd file. It takes AVERAGE, MIN and MAX RRAs
 349 into account and rebuilds the original data stream such that it can be
 350 re-entered into a fresh rrd file. Depending on the configuration of the new
 351 rrd file the resulting data closely matches the data in the original rrd
 352 file.
 353
 354 If the DS configuration of the new RRD file differs from the original
 355 one the B<--src-tmp> and B<--dest-tmp> options can be used to override
 356 the default order of DS entries.
 357
 358 =head1 BEWARE
 359
 360 There are two warnings you should keep in mind:
 361
 362 =over
 363
 364 =item *
 365
 366 This is NEW CODE, so there may be hidden problem. This this first on your real data before doing any major conversions.
 367
 368 =item *
 369
 370 In my testing there were differences between source and destination which I attribute to
 371 quantization issues especially when switching from one consolidation level to the next one.
 372
 373 =back
 374
 375 =head1 EXAMPLE
 376
 377 F<legacy.rrd> has data for the last two years and F<new.rrd> is still empty
 378 but created with a start data two years in the past. F<legacy.rrd> contains
 379 4 Date Sources (in,out,error,drop) and F<new.rrd> contains 3 data-sources
 380 (myout,myin,overrun). We want to transfer the old 'in' to 'myin' and 'out'
 381 to 'myout' while dropping 'error' and 'drop'.
 382
 383  rrdjiig --src-tmpl=in:out --dst-tmpl=myin:myout legacy.rrd new.rrd
 384
 385 =head1 COPYRIGHT
 386
 387 Copyright (c) 2010 by OETIKER+PARTNER AG. All rights reserved.
 388
 389 =head1 LICENSE
 390
 391 This program is free software; you can redistribute it and/or modify
 392 it under the terms of the GNU General Public License as published by
 393 the Free Software Foundation; either version 3 of the License, or
 394 (at your option) any later version.
 395
 396 This program is distributed in the hope that it will be useful,
 397 but WITHOUT ANY WARRANTY; without even the implied warranty of
 398 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 399 GNU General Public License for more details.
 400
 401 You should have received a copy of the GNU General Public License
 402 along with this program; if not, write to the Free Software
 403 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 404
 405 =head1 AUTHOR
 406
 407 S<Tobi Oetiker E<lt>tobi@oetiker.chE<gt>>
 408
 409 The development of  this tool has been sponsored by L<www.init7.net|http://www.init7.net>.
 410
 411 =head1 HISTORY
 412
 413  2010-02-25 to Initial Version
 414
 415 =cut
 416
 417 # Emacs Configuration
 418 #
 419 # Local Variables:
 420 # mode: cperl
 421 # eval: (cperl-set-style "PerlStyle")
 422 # mode: flyspell
 423 # mode: flyspell-prog
 424 # End:
 425 #
 426 # vi: sw=4 et