Code

new CDEF functions for predictions PREDICT and PREDICTSIGMA
authoroetiker <oetiker@a5681a0c-68f1-0310-ab6d-d61299d08faa>
Fri, 7 Nov 2008 14:02:09 +0000 (14:02 +0000)
committeroetiker <oetiker@a5681a0c-68f1-0310-ab6d-d61299d08faa>
Fri, 7 Nov 2008 14:02:09 +0000 (14:02 +0000)
-- Martin Sperl

git-svn-id: svn://svn.oetiker.ch/rrdtool/trunk/program@1649 a5681a0c-68f1-0310-ab6d-d61299d08faa

CONTRIBUTORS
doc/rrdgraph_rpn.pod
src/rrd_rpncalc.c
src/rrd_rpncalc.h

index 4c668b7a701f814a34885a5e8be58feba9316f5f..9b485a6a5dd148eb9e2300585b284175f8866eea 100644 (file)
@@ -77,6 +77,7 @@ Wim Heirman <wim.heirman elis.ugent.be> --units=si option
 Wolfgang Schrimm <wschrimm with uni-hd.de> xport function
 Wrolf Courtney <wrolf with wrolf.net> (HP-UX)
 hendrik visage <hvisage with is.co.za>
+Martin Sperl <rrdtool martin.sperl.org> (CDEF prediction functions)
 Philippe Simonet <philippe.simonet with swisscom.ch> (Windows Binaries)
 Alexander Lucke (lucke with dns-net.de) 
  of DNS:NET Internet Services (www.dns-net.de) http://rrdtool.org
index aabd7383e43979ae6396d0fc210aa3c879a55563..21661ba6f7138646909bb1f5e6a9ade92c7bd4c7 100644 (file)
@@ -184,6 +184,78 @@ source value is NAN the complete sliding window is affected. The TRENDNAN
 operation ignores all NAN-values in a sliding window and computes the 
 average of the remaining values.
 
+B<PREDICT, PREDICTSIGMA>
+
+Create a "sliding window" average/sigma of another data series, that also
+shifts the data series by given amounts of of time as well
+
+Usage - explicit stating shifts:
+CDEF:predict=<shift n>,...,<shift 1>,n,<window>,x,PREDICT
+CDEF:sigma=<shift n>,...,<shift 1>,n,<window>,x,PREDICTSIGMA
+
+Usage - shifts defined as a base shift and a number of time this is applied
+CDEF:predict=<shift multiplier>,-n,<window>,x,PREDICT
+CDEF:sigma=<shift multiplier>,-n,<window>,x,PREDICTSIGMA
+
+Example:
+CDEF:predict=172800,86400,2,1800,x,PREDICT
+
+This will create a half-hour (1800 second) sliding window average/sigma of x, that
+average is essentially computed as shown here:
+
+ +---!---!---!---!---!---!---!---!---!---!---!---!---!---!---!---!---!--->
+                                                                     now
+                                                  shift 1        t0
+                                         <----------------------->
+                               window
+                         <--------------->
+                                       shift 2
+                 <----------------------------------------------->
+       window
+ <--------------->
+                                                      shift 1        t1
+                                             <----------------------->
+                                   window
+                             <--------------->
+                                            shift 2
+                     <----------------------------------------------->
+           window
+     <--------------->
+
+ Value at sample (t0) will be the average between (t0-shift1-window) and (t0-shift1)
+                                      and between (t0-shift2-window) and (t0-shift2)
+ Value at sample (t1) will be the average between (t1-shift1-window) and (t1-shift1)
+                                      and between (t1-shift2-window) and (t1-shift2)
+
+
+The function is by design NAN-safe. 
+This also allows for extrapolation into the future (say a few days)
+- you may need to define the data series whit the optional start= parameter, so that 
+the source data series has enough data to provide prediction also at the beginning of a graph...
+
+Here an example, that will create a 10 day graph that also shows the 
+prediction 3 days into the future with its uncertainty value (as defined by avg+-4*sigma)
+This also shows if the prediction is exceeded at a certain point.
+
+rrdtool graph image.png --imgformat=PNG \
+ --start=-7days --end=+3days --width=1000 --height=200 --alt-autoscale-max \
+ DEF:value=value.rrd:value:AVERAGE:start=-14days \
+ LINE1:value#ff0000:value \
+ CDEF:predict=86400,-7,1800,value,PREDICT \
+ CDEF:sigma=86400,-7,1800,value,PREDICTSIGMA \
+ CDEF:upper=predict,sigma,3,*,+ \
+ CDEF:lower=predict,sigma,3,*,- \
+ LINE1:predict#00ff00:prediction \
+ LINE1:upper#0000ff:upper\ certainty\ limit \
+ LINE1:lower#0000ff:lower\ certainty\ limit \
+ CDEF:exceeds=value,UN,0,value,lower,upper,LIMIT,UN,IF \
+ TICK:exceeds#aa000080:1
+
+Note: Experience has shown that a factor between 3 and 5 to scale sigma is a good 
+discriminator to detect abnormal behaviour. This obviously depends also on the type 
+of data and how "noisy" the data series is.
+
+This prediction can only be used for short term extrapolations - say a few days into the future-
 
 =item Special values
 
index 5bb5e4056ac21c5beb203c973d29962479ba5725..d83c0f4a7d920aabd62f6daacff1d910c7b8061b 100644 (file)
@@ -174,6 +174,8 @@ void rpn_compact2str(
             add_op(OP_REV, REV)
             add_op(OP_TREND, TREND)
             add_op(OP_TRENDNAN, TRENDNAN)
+            add_op(OP_PREDICT, PREDICT)
+            add_op(OP_PREDICTSIGMA, PREDICTSIGMA)
             add_op(OP_RAD2DEG, RAD2DEG)
             add_op(OP_DEG2RAD, DEG2RAD)
             add_op(OP_AVG, AVG)
@@ -371,6 +373,8 @@ rpnp_t   *rpn_parse(
             match_op(OP_REV, REV)
             match_op(OP_TREND, TREND)
             match_op(OP_TRENDNAN, TRENDNAN)
+            match_op(OP_PREDICT, PREDICT)
+            match_op(OP_PREDICTSIGMA, PREDICTSIGMA)
             match_op(OP_RAD2DEG, RAD2DEG)
             match_op(OP_DEG2RAD, DEG2RAD)
             match_op(OP_AVG, AVG)
@@ -784,6 +788,84 @@ short rpn_calc(
                 }
             }
             break;
+        case OP_PREDICT:
+        case OP_PREDICTSIGMA:
+            stackunderflow(2);
+           {
+               /* the local averaging window (similar to trend, but better here, as we get better statistics thru numbers)*/
+               int   locstepsize = rpnstack->s[--stptr];
+               /* the number of shifts and range-checking*/
+               int     shifts = rpnstack->s[--stptr];
+                stackunderflow(shifts);
+               // handle negative shifts special
+               if (shifts<0) {
+                   stptr--;
+               } else {
+                   stptr-=shifts;
+               }
+               /* the real calculation */
+               double val=DNAN;
+               /* the info on the datasource */
+               time_t  dsstep = (time_t) rpnp[rpi - 1].step;
+               int    dscount = rpnp[rpi - 1].ds_cnt;
+               int   locstep = (int)ceil((float)locstepsize/(float)dsstep);
+
+               /* the sums */
+                double    sum = 0;
+               double    sum2 = 0;
+                int       count = 0;
+               /* now loop for each position */
+               int doshifts=shifts;
+               if (shifts<0) { doshifts=-shifts; }
+               for(int loop=0;loop<doshifts;loop++) {
+                   /* calculate shift step */
+                   int shiftstep=1;
+                   if (shifts<0) {
+                       shiftstep = loop*rpnstack->s[stptr];
+                   } else { 
+                       shiftstep = rpnstack->s[stptr+loop]; 
+                   }
+                   if(shiftstep <0) {
+                       rrd_set_error("negative shift step not allowed: %i",shiftstep);
+                       return -1;
+                   }
+                   shiftstep=(int)ceil((float)shiftstep/(float)dsstep);
+                   /* loop all local shifts */
+                   for(int i=0;i<=locstep;i++) {
+                       /* now calculate offset into data-array - relative to output_idx*/
+                       int offset=shiftstep+i;
+                       /* and process if we have index 0 of above */
+                       if ((offset>=0)&&(offset<output_idx)) {
+                           /* get the value */
+                           val =rpnp[rpi - 1].data[-dscount * offset];
+                           /* and handle the non NAN case only*/
+                           if (! isnan(val)) {
+                               sum+=val;
+                               sum2+=val*val;
+                               count++;
+                           }
+                       }
+                   }
+               }
+               /* do the final calculations */
+               val=DNAN;
+               if (rpnp[rpi].op == OP_PREDICT) {  /* the average */
+                   if (count>0) {
+                       val = sum/(double)count;
+                   } 
+               } else {
+                   if (count>1) { /* the sigma case */
+                       val=count*sum2-sum*sum;
+                       if (val<0) {
+                           val=DNAN;
+                       } else {
+                           val=sqrt(val/((float)count*((float)count-1.0)));
+                       }
+                   }
+               }
+               rpnstack->s[stptr] = val;
+           }
+            break;
         case OP_TREND:
         case OP_TRENDNAN:
             stackunderflow(1);
index 6f91e9910dd47e238f66200b1a9b494f74d753b2..10c8edd4b3244d9abd41d378400aebb02fb83a1b 100644 (file)
@@ -18,6 +18,7 @@ enum op_en { OP_NUMBER = 0, OP_VARIABLE, OP_INF, OP_PREV, OP_NEGINF,
     OP_UN, OP_END, OP_LTIME, OP_NE, OP_ISINF, OP_PREV_OTHER, OP_COUNT,
     OP_ATAN, OP_SQRT, OP_SORT, OP_REV, OP_TREND, OP_TRENDNAN,
     OP_ATAN2, OP_RAD2DEG, OP_DEG2RAD,
+    OP_PREDICT,OP_PREDICTSIGMA,
     OP_AVG, OP_ABS, OP_ADDNAN
 };