From: oetiker Date: Fri, 7 Nov 2008 14:02:09 +0000 (+0000) Subject: new CDEF functions for predictions PREDICT and PREDICTSIGMA X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=f9e62abf5664d61987e111e4f1f9388e506a10ff;p=rrdtool.git new CDEF functions for predictions PREDICT and PREDICTSIGMA -- Martin Sperl git-svn-id: svn://svn.oetiker.ch/rrdtool/trunk/program@1649 a5681a0c-68f1-0310-ab6d-d61299d08faa --- diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 4c668b7..9b485a6 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -77,6 +77,7 @@ Wim Heirman --units=si option Wolfgang Schrimm xport function Wrolf Courtney (HP-UX) hendrik visage +Martin Sperl (CDEF prediction functions) Philippe Simonet (Windows Binaries) Alexander Lucke (lucke with dns-net.de) of DNS:NET Internet Services (www.dns-net.de) http://rrdtool.org diff --git a/doc/rrdgraph_rpn.pod b/doc/rrdgraph_rpn.pod index aabd738..21661ba 100644 --- a/doc/rrdgraph_rpn.pod +++ b/doc/rrdgraph_rpn.pod @@ -184,6 +184,78 @@ source value is NAN the complete sliding window is affected. The TRENDNAN operation ignores all NAN-values in a sliding window and computes the average of the remaining values. +B + +Create a "sliding window" average/sigma of another data series, that also +shifts the data series by given amounts of of time as well + +Usage - explicit stating shifts: +CDEF:predict=,...,,n,,x,PREDICT +CDEF:sigma=,...,,n,,x,PREDICTSIGMA + +Usage - shifts defined as a base shift and a number of time this is applied +CDEF:predict=,-n,,x,PREDICT +CDEF:sigma=,-n,,x,PREDICTSIGMA + +Example: +CDEF:predict=172800,86400,2,1800,x,PREDICT + +This will create a half-hour (1800 second) sliding window average/sigma of x, that +average is essentially computed as shown here: + + +---!---!---!---!---!---!---!---!---!---!---!---!---!---!---!---!---!---> + now + shift 1 t0 + <-----------------------> + window + <---------------> + shift 2 + <-----------------------------------------------> + window + <---------------> + shift 1 t1 + <-----------------------> + window + <---------------> + shift 2 + <-----------------------------------------------> + window + <---------------> + + Value at sample (t0) will be the average between (t0-shift1-window) and (t0-shift1) + and between (t0-shift2-window) and (t0-shift2) + Value at sample (t1) will be the average between (t1-shift1-window) and (t1-shift1) + and between (t1-shift2-window) and (t1-shift2) + + +The function is by design NAN-safe. +This also allows for extrapolation into the future (say a few days) +- you may need to define the data series whit the optional start= parameter, so that +the source data series has enough data to provide prediction also at the beginning of a graph... + +Here an example, that will create a 10 day graph that also shows the +prediction 3 days into the future with its uncertainty value (as defined by avg+-4*sigma) +This also shows if the prediction is exceeded at a certain point. + +rrdtool graph image.png --imgformat=PNG \ + --start=-7days --end=+3days --width=1000 --height=200 --alt-autoscale-max \ + DEF:value=value.rrd:value:AVERAGE:start=-14days \ + LINE1:value#ff0000:value \ + CDEF:predict=86400,-7,1800,value,PREDICT \ + CDEF:sigma=86400,-7,1800,value,PREDICTSIGMA \ + CDEF:upper=predict,sigma,3,*,+ \ + CDEF:lower=predict,sigma,3,*,- \ + LINE1:predict#00ff00:prediction \ + LINE1:upper#0000ff:upper\ certainty\ limit \ + LINE1:lower#0000ff:lower\ certainty\ limit \ + CDEF:exceeds=value,UN,0,value,lower,upper,LIMIT,UN,IF \ + TICK:exceeds#aa000080:1 + +Note: Experience has shown that a factor between 3 and 5 to scale sigma is a good +discriminator to detect abnormal behaviour. This obviously depends also on the type +of data and how "noisy" the data series is. + +This prediction can only be used for short term extrapolations - say a few days into the future- =item Special values diff --git a/src/rrd_rpncalc.c b/src/rrd_rpncalc.c index 5bb5e40..d83c0f4 100644 --- a/src/rrd_rpncalc.c +++ b/src/rrd_rpncalc.c @@ -174,6 +174,8 @@ void rpn_compact2str( add_op(OP_REV, REV) add_op(OP_TREND, TREND) add_op(OP_TRENDNAN, TRENDNAN) + add_op(OP_PREDICT, PREDICT) + add_op(OP_PREDICTSIGMA, PREDICTSIGMA) add_op(OP_RAD2DEG, RAD2DEG) add_op(OP_DEG2RAD, DEG2RAD) add_op(OP_AVG, AVG) @@ -371,6 +373,8 @@ rpnp_t *rpn_parse( match_op(OP_REV, REV) match_op(OP_TREND, TREND) match_op(OP_TRENDNAN, TRENDNAN) + match_op(OP_PREDICT, PREDICT) + match_op(OP_PREDICTSIGMA, PREDICTSIGMA) match_op(OP_RAD2DEG, RAD2DEG) match_op(OP_DEG2RAD, DEG2RAD) match_op(OP_AVG, AVG) @@ -784,6 +788,84 @@ short rpn_calc( } } break; + case OP_PREDICT: + case OP_PREDICTSIGMA: + stackunderflow(2); + { + /* the local averaging window (similar to trend, but better here, as we get better statistics thru numbers)*/ + int locstepsize = rpnstack->s[--stptr]; + /* the number of shifts and range-checking*/ + int shifts = rpnstack->s[--stptr]; + stackunderflow(shifts); + // handle negative shifts special + if (shifts<0) { + stptr--; + } else { + stptr-=shifts; + } + /* the real calculation */ + double val=DNAN; + /* the info on the datasource */ + time_t dsstep = (time_t) rpnp[rpi - 1].step; + int dscount = rpnp[rpi - 1].ds_cnt; + int locstep = (int)ceil((float)locstepsize/(float)dsstep); + + /* the sums */ + double sum = 0; + double sum2 = 0; + int count = 0; + /* now loop for each position */ + int doshifts=shifts; + if (shifts<0) { doshifts=-shifts; } + for(int loop=0;loops[stptr]; + } else { + shiftstep = rpnstack->s[stptr+loop]; + } + if(shiftstep <0) { + rrd_set_error("negative shift step not allowed: %i",shiftstep); + return -1; + } + shiftstep=(int)ceil((float)shiftstep/(float)dsstep); + /* loop all local shifts */ + for(int i=0;i<=locstep;i++) { + /* now calculate offset into data-array - relative to output_idx*/ + int offset=shiftstep+i; + /* and process if we have index 0 of above */ + if ((offset>=0)&&(offset0) { + val = sum/(double)count; + } + } else { + if (count>1) { /* the sigma case */ + val=count*sum2-sum*sum; + if (val<0) { + val=DNAN; + } else { + val=sqrt(val/((float)count*((float)count-1.0))); + } + } + } + rpnstack->s[stptr] = val; + } + break; case OP_TREND: case OP_TRENDNAN: stackunderflow(1); diff --git a/src/rrd_rpncalc.h b/src/rrd_rpncalc.h index 6f91e99..10c8edd 100644 --- a/src/rrd_rpncalc.h +++ b/src/rrd_rpncalc.h @@ -18,6 +18,7 @@ enum op_en { OP_NUMBER = 0, OP_VARIABLE, OP_INF, OP_PREV, OP_NEGINF, OP_UN, OP_END, OP_LTIME, OP_NE, OP_ISINF, OP_PREV_OTHER, OP_COUNT, OP_ATAN, OP_SQRT, OP_SORT, OP_REV, OP_TREND, OP_TRENDNAN, OP_ATAN2, OP_RAD2DEG, OP_DEG2RAD, + OP_PREDICT,OP_PREDICTSIGMA, OP_AVG, OP_ABS, OP_ADDNAN };