Imported Upstream version 1.4.7

[pkg-rrdtool.git] / doc / rrdcreate.1
diff --git a/doc/rrdcreate.1 b/doc/rrdcreate.1

index 2b1941c17ac46ac77d935ae2b36c6a29758b54e9..94ac5fc0e3b81dda43576d25893bc0ca49602a01 100644 (file)
--- a/doc/rrdcreate.1
+++ b/doc/rrdcreate.1
@@ -1,15 +1,7 @@
-.\" Automatically generated by Pod::Man v1.37, Pod::Parser v1.32
+.\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07)
  .\"
  .\" Standard preamble:
  .\" ========================================================================
-.de Sh \" Subsection heading
-.br
-.if t .Sp
-.ne 5
-.PP
-\fB\\$1\fR
-.PP
-..
  .de Sp \" Vertical space (when we can't use .PP)
  .if t .sp .5v
  .if n .sp
@@ -48,22 +40,25 @@
  .    ds R" ''
  'br\}
  .\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\"
  .\" If the F register is turned on, we'll generate index entries on stderr for
-.\" titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and index
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
  .\" entries marked with X<> in POD.  Of course, you'll have to process the
  .\" output yourself in some meaningful fashion.
-.if \nF \{\
+.ie \nF \{\
  .    de IX
  .    tm Index:\\$1\t\\n%\t"\\$2"
  ..
  .    nr % 0
  .    rr F
  .\}
-.\"
-.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
-.\" way too many mistakes in technical documents.
-.hy 0
-.if n .na
+.el \{\
+.    de IX
+..
+.\}
  .\"
  .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
  .\" Fear.  Run.  Save yourself.  No user-serviceable parts.
@@ -129,7 +124,11 @@
  .\" ========================================================================
  .\"
  .IX Title "RRDCREATE 1"
-.TH RRDCREATE 1 "2008-07-23" "1.2.28" "rrdtool"
+.TH RRDCREATE 1 "2011-01-06" "1.4.7" "rrdtool"
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
  .SH "NAME"
  rrdcreate \- Set up a new Round Robin Database
  .SH "SYNOPSIS"
@@ -137,6 +136,7 @@ rrdcreate \- Set up a new Round Robin Database
  \&\fBrrdtool\fR \fBcreate\fR \fIfilename\fR
  [\fB\-\-start\fR|\fB\-b\fR\ \fIstart\ time\fR]
  [\fB\-\-step\fR|\fB\-s\fR\ \fIstep\fR]
+[\fB\-\-no\-overwrite\fR]
  [\fB\s-1DS:\s0\fR\fIds-name\fR\fB:\fR\fI\s-1DST\s0\fR\fB:\fR\fIdst\ arguments\fR]
  [\fB\s-1RRA:\s0\fR\fI\s-1CF\s0\fR\fB:\fR\fIcf\ arguments\fR]
  .SH "DESCRIPTION"
@@ -144,48 +144,50 @@ rrdcreate \- Set up a new Round Robin Database
  The create function of RRDtool lets you set up new Round Robin
  Database (\fB\s-1RRD\s0\fR) files.  The file is created at its final, full size
  and filled with \fI*UNKNOWN*\fR data.
-.IP "\fIfilename\fR" 4
-.IX Item "filename"
+.SS "\fIfilename\fP"
+.IX Subsection "filename"
  The name of the \fB\s-1RRD\s0\fR you want to create. \fB\s-1RRD\s0\fR files should end
  with the extension \fI.rrd\fR. However, \fBRRDtool\fR will accept any
  filename.
-.IP "\fB\-\-start\fR|\fB\-b\fR \fIstart time\fR (default: now \- 10s)" 4
-.IX Item "--start|-b start time (default: now - 10s)"
+.SS "\fB\-\-start\fP|\fB\-b\fP \fIstart time\fP (default: now \- 10s)"
+.IX Subsection "--start|-b start time (default: now - 10s)"
  Specifies the time in seconds since 1970\-01\-01 \s-1UTC\s0 when the first
  value should be added to the \fB\s-1RRD\s0\fR. \fBRRDtool\fR will not accept
  any data timed before or at the time specified.
-.Sp
+.PP
  See also AT-STYLE \s-1TIME\s0 \s-1SPECIFICATION\s0 section in the
  \&\fIrrdfetch\fR documentation for other ways to specify time.
-.IP "\fB\-\-step\fR|\fB\-s\fR \fIstep\fR (default: 300 seconds)" 4
-.IX Item "--step|-s step (default: 300 seconds)"
+.SS "\fB\-\-step\fP|\fB\-s\fP \fIstep\fP (default: 300 seconds)"
+.IX Subsection "--step|-s step (default: 300 seconds)"
  Specifies the base interval in seconds with which data will be fed
  into the \fB\s-1RRD\s0\fR.
-.IP "\fB\s-1DS:\s0\fR\fIds-name\fR\fB:\fR\fI\s-1DST\s0\fR\fB:\fR\fIdst arguments\fR" 4
-.IX Item "DS:ds-name:DST:dst arguments"
+.SS "\fB\-\-no\-overwrite\fP"
+.IX Subsection "--no-overwrite"
+Do not clobber an existing file of the same name.
+.SS "\fB\s-1DS:\s0\fP\fIds-name\fP\fB:\fP\fI\s-1DST\s0\fP\fB:\fP\fIdst arguments\fP"
+.IX Subsection "DS:ds-name:DST:dst arguments"
  A single \fB\s-1RRD\s0\fR can accept input from several data sources (\fB\s-1DS\s0\fR),
  for example incoming and outgoing traffic on a specific communication
  line. With the \fB\s-1DS\s0\fR configuration option you must define some basic
  properties of each data source you want to store in the \fB\s-1RRD\s0\fR.
-.Sp
+.PP
  \&\fIds-name\fR is the name you will use to reference this particular data
  source from an \fB\s-1RRD\s0\fR. A \fIds-name\fR must be 1 to 19 characters long in
  the characters [a\-zA\-Z0\-9_].
-.Sp
+.PP
  \&\fI\s-1DST\s0\fR defines the Data Source Type. The remaining arguments of a
  data source entry depend on the data source type. For \s-1GAUGE\s0, \s-1COUNTER\s0,
  \&\s-1DERIVE\s0, and \s-1ABSOLUTE\s0 the format for a data source entry is:
-.Sp
+.PP
  \&\fB\s-1DS:\s0\fR\fIds-name\fR\fB:\fR\fI\s-1GAUGE\s0 | \s-1COUNTER\s0 | \s-1DERIVE\s0 | \s-1ABSOLUTE\s0\fR\fB:\fR\fIheartbeat\fR\fB:\fR\fImin\fR\fB:\fR\fImax\fR
-.Sp
+.PP
  For \s-1COMPUTE\s0 data sources, the format is:
-.Sp
+.PP
  \&\fB\s-1DS:\s0\fR\fIds-name\fR\fB:\fR\fI\s-1COMPUTE\s0\fR\fB:\fR\fIrpn-expression\fR
-.Sp
+.PP
  In order to decide which data source type to use, review the
  definitions that follow. Also consult the section on \*(L"\s-1HOW\s0 \s-1TO\s0 \s-1MEASURE\s0\*(R"
  for further insight.
-.RS 4
  .IP "\fB\s-1GAUGE\s0\fR" 4
  .IX Item "GAUGE"
  is for things like temperatures or number of people in a room or the
@@ -208,7 +210,7 @@ room. Internally, derive works exactly like \s-1COUNTER\s0 but without
  overflow checks. So if your counter does not reset at 32 or 64 bit you
  might want to use \s-1DERIVE\s0 and combine it with a \s-1MIN\s0 value of 0.
  .Sp
-\&\s-1NOTE\s0 on \s-1COUNTER\s0 vs \s-1DERIVE\s0
+\&\fB\s-1NOTE\s0 on \s-1COUNTER\s0 vs \s-1DERIVE\s0\fR
  .Sp
  by Don Baarda <don.baarda@baesystems.com>
  .Sp
@@ -243,24 +245,22 @@ formula. Consolidation functions are then applied normally to the PDPs
  of the \s-1COMPUTE\s0 data source (that is the rpn-expression is only applied
  to generate PDPs). In database software, such data sets are referred
  to as \*(L"virtual\*(R" or \*(L"computed\*(R" columns.
-.RE
-.RS 4
-.Sp
+.PP
  \&\fIheartbeat\fR defines the maximum number of seconds that may pass
  between two updates of this data source before the value of the
  data source is assumed to be \fI*UNKNOWN*\fR.
-.Sp
+.PP
  \&\fImin\fR and \fImax\fR define the expected range values for data supplied by a
-data source. If \fImin\fR and/or \fImax\fR any value outside the defined range
+data source. If \fImin\fR and/or \fImax\fR are specified any value outside the defined range
  will be regarded as \fI*UNKNOWN*\fR. If you do not know or care about min and
  max, set them to U for unknown. Note that min and max always refer to the
  processed values of the \s-1DS\s0. For a traffic\-\fB\s-1COUNTER\s0\fR type \s-1DS\s0 this would be
  the maximum and minimum data-rate expected from the device.
-.Sp
+.PP
  \&\fIIf information on minimal/maximal expected values is available,
  always set the min and/or max properties. This will help RRDtool in
  doing a simple sanity check on the data supplied when running update.\fR
-.Sp
+.PP
  \&\fIrpn-expression\fR defines the formula used to compute the PDPs of a
  \&\s-1COMPUTE\s0 data source from other data sources in the same <\s-1RRD\s0>. It is
  similar to defining a \fB\s-1CDEF\s0\fR argument for the graph command. Please
@@ -271,34 +271,53 @@ the \s-1RPN\s0 expression, the \s-1COMPUTE\s0 data source may only refer to the
  names of data source listed previously in the create command. This is
  similar to the restriction that \fB\s-1CDEF\s0\fRs must refer only to \fB\s-1DEF\s0\fRs
  and \fB\s-1CDEF\s0\fRs previously defined in the same graph command.
-.RE
-.IP "\fB\s-1RRA:\s0\fR\fI\s-1CF\s0\fR\fB:\fR\fIcf arguments\fR" 4
-.IX Item "RRA:CF:cf arguments"
+.SS "\fB\s-1RRA:\s0\fP\fI\s-1CF\s0\fP\fB:\fP\fIcf arguments\fP"
+.IX Subsection "RRA:CF:cf arguments"
  The purpose of an \fB\s-1RRD\s0\fR is to store data in the round robin archives
  (\fB\s-1RRA\s0\fR). An archive consists of a number of data values or statistics for
  each of the defined data-sources (\fB\s-1DS\s0\fR) and is defined with an \fB\s-1RRA\s0\fR line.
-.Sp
+.PP
  When data is entered into an \fB\s-1RRD\s0\fR, it is first fit into time slots
  of the length defined with the \fB\-s\fR option, thus becoming a \fIprimary
  data point\fR.
-.Sp
+.PP
  The data is also processed with the consolidation function (\fI\s-1CF\s0\fR) of
  the archive. There are several consolidation functions that
  consolidate primary data points via an aggregate function: \fB\s-1AVERAGE\s0\fR,
-\&\fB\s-1MIN\s0\fR, \fB\s-1MAX\s0\fR, \fB\s-1LAST\s0\fR. The format of \fB\s-1RRA\s0\fR line for these
+\&\fB\s-1MIN\s0\fR, \fB\s-1MAX\s0\fR, \fB\s-1LAST\s0\fR.
+.IP "\s-1AVERAGE\s0" 4
+.IX Item "AVERAGE"
+the average of the data points is stored.
+.IP "\s-1MIN\s0" 4
+.IX Item "MIN"
+the smallest of the data points is stored.
+.IP "\s-1MAX\s0" 4
+.IX Item "MAX"
+the largest of the data points is stored.
+.IP "\s-1LAST\s0" 4
+.IX Item "LAST"
+the last data points is used.
+.PP
+Note that data aggregation inevitably leads to loss of precision and
+information. The trick is to pick the aggregate function such that the
+\&\fIinteresting\fR properties of your data is kept across the aggregation
+process.
+.PP
+The format of \fB\s-1RRA\s0\fR line for these
  consolidation functions is:
-.Sp
+.PP
  \&\fB\s-1RRA:\s0\fR\fI\s-1AVERAGE\s0 | \s-1MIN\s0 | \s-1MAX\s0 | \s-1LAST\s0\fR\fB:\fR\fIxff\fR\fB:\fR\fIsteps\fR\fB:\fR\fIrows\fR
-.Sp
+.PP
  \&\fIxff\fR The xfiles factor defines what part of a consolidation interval may
  be made up from \fI*UNKNOWN*\fR data while the consolidated value is still
  regarded as known. It is given as the ratio of allowed \fI*UNKNOWN*\fR PDPs
  to the number of PDPs in the interval. Thus, it ranges from 0 to 1 (exclusive).
-.Sp
+.PP
  \&\fIsteps\fR defines how many of these \fIprimary data points\fR are used to build
  a \fIconsolidated data point\fR which then goes into the archive.
-.Sp
+.PP
  \&\fIrows\fR defines how many generations of data values are kept in an \fB\s-1RRA\s0\fR.
+Obviously, this has to be greater than zero.
  .SH "Aberrant Behavior Detection with Holt-Winters Forecasting"
  .IX Header "Aberrant Behavior Detection with Holt-Winters Forecasting"
  In addition to the aggregate functions, there are a set of specialized
@@ -308,9 +327,11 @@ flagging aberrant behavior in the data source time series:
  .IP "\(bu" 4
  \&\fB\s-1RRA:\s0\fR\fI\s-1HWPREDICT\s0\fR\fB:\fR\fIrows\fR\fB:\fR\fIalpha\fR\fB:\fR\fIbeta\fR\fB:\fR\fIseasonal period\fR[\fB:\fR\fIrra-num\fR]
  .IP "\(bu" 4
-\&\fB\s-1RRA:\s0\fR\fI\s-1SEASONAL\s0\fR\fB:\fR\fIseasonal period\fR\fB:\fR\fIgamma\fR\fB:\fR\fIrra-num\fR
+\&\fB\s-1RRA:\s0\fR\fI\s-1MHWPREDICT\s0\fR\fB:\fR\fIrows\fR\fB:\fR\fIalpha\fR\fB:\fR\fIbeta\fR\fB:\fR\fIseasonal period\fR[\fB:\fR\fIrra-num\fR]
+.IP "\(bu" 4
+\&\fB\s-1RRA:\s0\fR\fI\s-1SEASONAL\s0\fR\fB:\fR\fIseasonal period\fR\fB:\fR\fIgamma\fR\fB:\fR\fIrra-num\fR[\fB:smoothing\-window=\fR\fIfraction\fR]
  .IP "\(bu" 4
-\&\fB\s-1RRA:\s0\fR\fI\s-1DEVSEASONAL\s0\fR\fB:\fR\fIseasonal period\fR\fB:\fR\fIgamma\fR\fB:\fR\fIrra-num\fR
+\&\fB\s-1RRA:\s0\fR\fI\s-1DEVSEASONAL\s0\fR\fB:\fR\fIseasonal period\fR\fB:\fR\fIgamma\fR\fB:\fR\fIrra-num\fR[\fB:smoothing\-window=\fR\fIfraction\fR]
  .IP "\(bu" 4
  \&\fB\s-1RRA:\s0\fR\fI\s-1DEVPREDICT\s0\fR\fB:\fR\fIrows\fR\fB:\fR\fIrra-num\fR
  .IP "\(bu" 4
@@ -319,19 +340,32 @@ flagging aberrant behavior in the data source time series:
  These \fBRRAs\fR differ from the true consolidation functions in several ways.
  First, each of the \fB\s-1RRA\s0\fRs is updated once for every primary data point.
  Second, these \fBRRAs\fR are interdependent. To generate real-time confidence
-bounds, a matched set of \s-1HWPREDICT\s0, \s-1SEASONAL\s0, \s-1DEVSEASONAL\s0, and
-\&\s-1DEVPREDICT\s0 must exist. Generating smoothed values of the primary data points
-requires both a \s-1HWPREDICT\s0 \fB\s-1RRA\s0\fR and \s-1SEASONAL\s0 \fB\s-1RRA\s0\fR. Aberrant behavior
-detection requires \s-1FAILURES\s0, \s-1HWPREDICT\s0, \s-1DEVSEASONAL\s0, and \s-1SEASONAL\s0.
-.PP
-The actual predicted, or smoothed, values are stored in the \s-1HWPREDICT\s0
-\&\fB\s-1RRA\s0\fR. The predicted deviations are stored in \s-1DEVPREDICT\s0 (think a standard
-deviation which can be scaled to yield a confidence band). The \s-1FAILURES\s0
-\&\fB\s-1RRA\s0\fR stores binary indicators. A 1 marks the indexed observation as
-failure; that is, the number of confidence bounds violations in the
-preceding window of observations met or exceeded a specified threshold. An
-example of using these \fBRRAs\fR to graph confidence bounds and failures
-appears in rrdgraph.
+bounds, a matched set of \s-1SEASONAL\s0, \s-1DEVSEASONAL\s0, \s-1DEVPREDICT\s0, and either
+\&\s-1HWPREDICT\s0 or \s-1MHWPREDICT\s0 must exist. Generating smoothed values of the primary
+data points requires a \s-1SEASONAL\s0 \fB\s-1RRA\s0\fR and either an \s-1HWPREDICT\s0 or \s-1MHWPREDICT\s0 
+\&\fB\s-1RRA\s0\fR. Aberrant behavior detection requires \s-1FAILURES\s0, \s-1DEVSEASONAL\s0, \s-1SEASONAL\s0,
+and either \s-1HWPREDICT\s0 or \s-1MHWPREDICT\s0.
+.PP
+The predicted, or smoothed, values are stored in the \s-1HWPREDICT\s0 or \s-1MHWPREDICT\s0
+\&\fB\s-1RRA\s0\fR. \s-1HWPREDICT\s0 and \s-1MHWPREDICT\s0 are actually two variations on the
+Holt-Winters method. They are interchangeable. Both attempt to decompose data
+into three components: a baseline, a trend, and a seasonal coefficient.
+\&\s-1HWPREDICT\s0 adds its seasonal coefficient to the baseline to form a prediction, whereas
+\&\s-1MHWPREDICT\s0 multiplies its seasonal coefficient by the baseline to form a
+prediction. The difference is noticeable when the baseline changes
+significantly in the course of a season; \s-1HWPREDICT\s0 will predict the seasonality
+to stay constant as the baseline changes, but \s-1MHWPREDICT\s0 will predict the
+seasonality to grow or shrink in proportion to the baseline. The proper choice
+of method depends on the thing being modeled. For simplicity, the rest of this
+discussion will refer to \s-1HWPREDICT\s0, but \s-1MHWPREDICT\s0 may be substituted in its
+place.
+.PP
+The predicted deviations are stored in \s-1DEVPREDICT\s0 (think a standard deviation
+which can be scaled to yield a confidence band). The \s-1FAILURES\s0 \fB\s-1RRA\s0\fR stores 
+binary indicators. A 1 marks the indexed observation as failure; that is, the 
+number of confidence bounds violations in the preceding window of observations 
+met or exceeded a specified threshold. An example of using these \fBRRAs\fR to graph 
+confidence bounds and failures appears in rrdgraph.
  .PP
  The \s-1SEASONAL\s0 and \s-1DEVSEASONAL\s0 \fBRRAs\fR store the seasonal coefficients for the
  Holt-Winters forecasting algorithm and the seasonal deviations, respectively.
@@ -391,6 +425,13 @@ If \s-1SEASONAL\s0 and \s-1DEVSEASONAL\s0 \fBRRAs\fR are created explicitly, \fI
  be the same for both. Note that \fIgamma\fR can also be changed via the
  \&\fBRRDtool\fR \fItune\fR command.
  .PP
+\&\fIsmoothing-window\fR specifies the fraction of a season that should be
+averaged around each point. By default, the value of \fIsmoothing-window\fR is
+0.05, which means each value in \s-1SEASONAL\s0 and \s-1DEVSEASONAL\s0 will be occasionally
+replaced by averaging it with its (\fIseasonal period\fR*0.05) nearest neighbors.
+Setting \fIsmoothing-window\fR to zero will disable the running-average smoother
+altogether.
+.PP
  \&\fIrra-num\fR provides the links between related \fBRRAs\fR. If \s-1HWPREDICT\s0 is
  specified alone and the other \fBRRAs\fR are created implicitly, then
  there is no need to worry about this argument. If \fBRRAs\fR are created
@@ -443,7 +484,7 @@ an average rate for that \s-1PDP\s0. If the total \*(L"unknown\*(R" time account
  more than \fBhalf\fR the \*(L"step\*(R", the entire \s-1PDP\s0 is marked
  as \*(L"unknown\*(R". This means that a mixture of known and \*(L"unknown\*(R" sample
  times in a single \s-1PDP\s0 \*(L"step\*(R" may or may not add up to enough \*(L"known\*(R"
-time to warrent for a known \s-1PDP\s0.
+time to warrant a known \s-1PDP\s0.
  .PP
  The \*(L"heartbeat\*(R" can be short (unusual) or long (typical) relative to
  the \*(L"step\*(R" interval between PDPs. A short \*(L"heartbeat\*(R" means you
@@ -455,10 +496,10 @@ sample. An extreme example of this might be a \*(L"step\*(R" of 5 minutes and a
  result in all the PDPs for that entire day period being set to the
  same average rate. \fI\-\- Don Baarda <don.baarda@baesystems.com>\fR
  .PP
-.Vb 35
+.Vb 10
  \&       time|
  \&       axis|
-\& begin__|00|
+\& begin_\|_|00|
  \&        |01|
  \&       u|02|\-\-\-\-* sample1, restart "hb"\-timer
  \&       u|03|   /
@@ -486,7 +527,7 @@ same average rate. \fI\-\- Don Baarda <don.baarda@baesystems.com>\fR
  \&        |25|   /
  \&        |26|  /
  \&        |27|\-\-\-\-* sample7, restart "hb"
-\& step2__|28|   /
+\& step2_\|_|28|   /
  \&        |22|  /
  \&        |23|\-\-\-\-* sample8, restart "hb", create "pdp" for step1, create "cdp" 
  \&        |24|   /
@@ -507,7 +548,7 @@ together with the time.
  .IP "Mail Messages" 4
  .IX Item "Mail Messages"
  Assume you have a method to count the number of messages transported by
-your mailserver in a certain amount of time, giving you data like '5
+your mail server in a certain amount of time, giving you data like '5
  messages in the last 65 seconds'. If you look at the count of 5 like an
  \&\fB\s-1ABSOLUTE\s0\fR data type you can simply update the \s-1RRD\s0 with the number 5 and the
  end time of your monitoring period. RRDtool will then record the number of
@@ -526,7 +567,7 @@ plots continuous data, and as such is not appropriate for plotting
  absolute amounts as for example \*(L"total bytes\*(R" sent and received in a
  router. What you probably want is plot rates that you can scale to
  bytes/hour, for example, or plot absolute amounts with another tool
-that draws bar\-plots, where the delta-time is clear on the plot for
+that draws bar-plots, where the delta-time is clear on the plot for
  each point (such that when you read the graph you see for example \s-1GB\s0
  on the y axis, days on the x axis and one bar for each day).
  .SH "EXAMPLE"