1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 * $Log$
8 * Revision 1.10 2004/05/26 22:11:12 oetiker
9 * reduce compiler warnings. Many small fixes. -- Mike Slifcak <slif@bellsouth.net>
10 *
11 * Revision 1.9 2003/04/29 21:56:49 oetiker
12 * readline in rrd_open.c reads the file in 8 KB blocks, and calls realloc for
13 * each block. realloc is very slow in Mac OS X for huge blocks, e.g. when
14 * restoring databases from huge xml files. This patch finds the size of the
15 * file, and starts out with malloc'ing the full size.
16 * -- Peter Speck <speck@ruc.dk>
17 *
18 * Revision 1.8 2003/04/11 19:43:44 oetiker
19 * New special value COUNT which allows calculations based on the position of a
20 * value within a data set. Bug fix in rrd_rpncalc.c. PREV returned erroneus
21 * value for the second value. Bug fix in rrd_restore.c. Bug causing seek error
22 * when accesing an RRD restored from an xml that holds an RRD version <3.
23 * -- Ruben Justo <ruben@ainek.com>
24 *
25 * Revision 1.7 2003/03/31 21:22:12 oetiker
26 * enables RRDtool updates with microsecond or in case of windows millisecond
27 * precision. This is needed to reduce time measurement error when archive step
28 * is small. (<30s) -- Sasha Mikheev <sasha@avalon-net.co.il>
29 *
30 * Revision 1.6 2003/02/13 07:05:27 oetiker
31 * Find attached the patch I promised to send to you. Please note that there
32 * are three new source files (src/rrd_is_thread_safe.h, src/rrd_thread_safe.c
33 * and src/rrd_not_thread_safe.c) and the introduction of librrd_th. This
34 * library is identical to librrd, but it contains support code for per-thread
35 * global variables currently used for error information only. This is similar
36 * to how errno per-thread variables are implemented. librrd_th must be linked
37 * alongside of libpthred
38 *
39 * There is also a new file "THREADS", holding some documentation.
40 *
41 * -- Peter Stamfest <peter@stamfest.at>
42 *
43 * Revision 1.5 2002/06/20 00:21:03 jake
44 * More Win32 build changes; thanks to Kerry Calvert.
45 *
46 * Revision 1.4 2002/02/01 20:34:49 oetiker
47 * fixed version number and date/time
48 *
49 * Revision 1.3 2001/03/04 13:01:55 oetiker
50 * Aberrant Behavior Detection support. A brief overview added to rrdtool.pod.
51 * Major updates to rrd_update.c, rrd_create.c. Minor update to other core files.
52 * This is backwards compatible! But new files using the Aberrant stuff are not readable
53 * by old rrdtool versions. See http://cricket.sourceforge.net/aberrant/rrd_hw.htm
54 * -- Jake Brutlag <jakeb@corp.webtv.net>
55 *
56 * Revision 1.2 2001/03/04 10:29:20 oetiker
57 * fixed filedescriptor leak
58 * -- Mike Franusich <mike@franusich.com>
59 *
60 * Revision 1.1.1.1 2001/02/25 22:25:05 oetiker
61 * checkin
62 *
63 *****************************************************************************/
65 #include "rrd_tool.h"
66 #include "unused.h"
67 #define MEMBLK 8192
69 /* DEBUG 2 prints information obtained via mincore(2) */
70 // #define DEBUG 2
71 /* do not calculate exact madvise hints but assume 1 page for headers and
72 * set DONTNEED for the rest, which is assumed to be data */
73 //#define ONE_PAGE 1
74 /* Avoid calling madvise on areas that were already hinted. May be benefical if
75 * your syscalls are very slow */
76 #define CHECK_MADVISE_OVERLAPS 1
78 #ifdef HAVE_MMAP
79 /* the cast to void* is there to avoid this warning seen on ia64 with certain
80 versions of gcc: 'cast increases required alignment of target type'
81 */
82 #define __rrd_read(dst, dst_t, cnt) \
83 (dst) = (dst_t*)(void*) (data + offset); \
84 offset += sizeof(dst_t) * (cnt)
85 #else
86 #define __rrd_read(dst, dst_t, cnt) \
87 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
88 rrd_set_error(#dst " malloc"); \
89 goto out_nullify_head; \
90 } \
91 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
92 #endif
94 /* next page-aligned (i.e. page-align up) */
95 #ifndef PAGE_ALIGN
96 #define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
97 #endif
98 /* previous page-aligned (i.e. page-align down) */
99 #ifndef PAGE_ALIGN_DOWN
100 #define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
101 #endif
103 #ifdef HAVE_MMAP
104 /* vector of last madvise hint */
105 typedef struct _madvise_vec_t {
106 void *start;
107 ssize_t length;
108 } _madvise_vec_t;
109 _madvise_vec_t _madv_vec = { NULL, 0 };
110 #endif
112 #if defined CHECK_MADVISE_OVERLAPS
113 #define _madvise(_start, _off, _hint) \
114 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
115 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
116 madvise((_start), (_off), (_hint)); \
117 }
118 #else
119 #define _madvise(_start, _off, _hint) \
120 madvise((_start), (_off), (_hint))
121 #endif
123 /* Open a database file, return its header and an open filehandle,
124 * positioned to the first cdp in the first rra.
125 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
126 * before returning an error. Do not call rrd_close upon failure of rrd_open.
127 */
129 rrd_file_t *rrd_open(
130 const char *const file_name,
131 rrd_t *rrd,
132 unsigned rdwr)
133 {
134 int flags = 0;
135 mode_t mode = S_IRUSR;
136 int version;
138 #ifdef HAVE_MMAP
139 ssize_t _page_size = sysconf(_SC_PAGESIZE);
140 int mm_prot = PROT_READ, mm_flags = 0;
141 char *data;
142 #endif
143 off_t offset = 0;
144 struct stat statb;
145 rrd_file_t *rrd_file = NULL;
146 off_t newfile_size = 0;
148 if (rdwr & RRD_CREAT) {
149 newfile_size = (off_t) rrd->stat_head->float_cookie;
150 free(rrd->stat_head);
151 }
152 rrd_init(rrd);
153 rrd_file = malloc(sizeof(rrd_file_t));
154 if (rrd_file == NULL) {
155 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
156 return NULL;
157 }
158 memset(rrd_file, 0, sizeof(rrd_file_t));
160 #ifdef DEBUG
161 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
162 (RRD_READONLY | RRD_READWRITE)) {
163 /* Both READONLY and READWRITE were given, which is invalid. */
164 rrd_set_error("in read/write request mask");
165 exit(-1);
166 }
167 #endif
168 if (rdwr & RRD_READONLY) {
169 flags |= O_RDONLY;
170 #ifdef HAVE_MMAP
171 mm_flags = MAP_PRIVATE;
172 # ifdef MAP_NORESERVE
173 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
174 # endif
175 #endif
176 } else {
177 if (rdwr & RRD_READWRITE) {
178 mode |= S_IWUSR;
179 flags |= O_RDWR;
180 #ifdef HAVE_MMAP
181 mm_flags = MAP_SHARED;
182 mm_prot |= PROT_WRITE;
183 #endif
184 }
185 if (rdwr & RRD_CREAT) {
186 flags |= (O_CREAT | O_TRUNC);
187 }
188 }
189 if (rdwr & RRD_READAHEAD) {
190 #ifdef MAP_POPULATE
191 mm_flags |= MAP_POPULATE; /* populate ptes and data */
192 #endif
193 #if defined MAP_NONBLOCK
194 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
195 #endif
196 #ifdef USE_DIRECT_IO
197 } else {
198 flags |= O_DIRECT;
199 #endif
200 }
201 #ifdef O_NONBLOCK
202 flags |= O_NONBLOCK;
203 #endif
205 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
206 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
207 goto out_free;
208 }
210 /* Better try to avoid seeks as much as possible. stat may be heavy but
211 * many concurrent seeks are even worse. */
212 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
213 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
214 goto out_close;
215 }
216 if (newfile_size == 0) {
217 rrd_file->file_len = statb.st_size;
218 } else {
219 rrd_file->file_len = newfile_size;
220 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
221 write(rrd_file->fd, "\0", 1); /* poke */
222 lseek(rrd_file->fd, 0, SEEK_SET);
223 }
224 #ifdef HAVE_POSIX_FADVISE
225 /* In general we need no read-ahead when dealing with rrd_files.
226 When we stop reading, it is highly unlikely that we start up again.
227 In this manner we actually save time and diskaccess (and buffer cache).
228 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
229 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
230 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
231 rrd_strerror(errno));
232 goto out_close;
233 }
234 #endif
236 /*
237 if (rdwr & RRD_READWRITE)
238 {
239 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
240 rrd_set_error("failed to disable the stream buffer\n");
241 return (-1);
242 }
243 }
244 */
245 #ifdef HAVE_MMAP
246 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
247 rrd_file->fd, offset);
249 /* lets see if the first read worked */
250 if (data == MAP_FAILED) {
251 rrd_set_error("mmaping file '%s': %s", file_name,
252 rrd_strerror(errno));
253 goto out_close;
254 }
255 rrd_file->file_start = data;
256 if (rdwr & RRD_CREAT) {
257 memset(data, DNAN, newfile_size - 1);
258 goto out_done;
259 }
260 #endif
261 if (rdwr & RRD_CREAT)
262 goto out_done;
263 #ifdef USE_MADVISE
264 if (rdwr & RRD_COPY) {
265 /* We will read everything in a moment (copying) */
266 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
267 } else {
268 # ifndef ONE_PAGE
269 /* We do not need to read anything in for the moment */
270 _madvise(data, rrd_file->file_len, MADV_DONTNEED);
271 /* the stat_head will be needed soonish, so hint accordingly */
272 _madvise(data + PAGE_ALIGN_DOWN(offset),
273 PAGE_ALIGN(sizeof(stat_head_t)),
274 MADV_WILLNEED | MADV_RANDOM);
276 # else
277 /* alternatively: keep 1 page worth of data, likely headers,
278 * don't need the rest. */
279 _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
280 _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
281 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
282 # endif
283 }
284 #endif
286 __rrd_read(rrd->stat_head, stat_head_t,
287 1);
289 /* lets do some test if we are on track ... */
290 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
291 rrd_set_error("'%s' is not an RRD file", file_name);
292 goto out_nullify_head;
293 }
295 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
296 rrd_set_error("This RRD was created on another architecture");
297 goto out_nullify_head;
298 }
300 version = atoi(rrd->stat_head->version);
302 if (version > atoi(RRD_VERSION)) {
303 rrd_set_error("can't handle RRD file version %s",
304 rrd->stat_head->version);
305 goto out_nullify_head;
306 }
307 #if defined USE_MADVISE && !defined ONE_PAGE
308 /* the ds_def will be needed soonish, so hint accordingly */
309 _madvise(data + PAGE_ALIGN_DOWN(offset),
310 PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
311 MADV_WILLNEED);
312 #endif
313 __rrd_read(rrd->ds_def, ds_def_t,
314 rrd->stat_head->ds_cnt);
316 #if defined USE_MADVISE && !defined ONE_PAGE
317 /* the rra_def will be needed soonish, so hint accordingly */
318 _madvise(data + PAGE_ALIGN_DOWN(offset),
319 PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
320 MADV_WILLNEED);
321 #endif
322 __rrd_read(rrd->rra_def, rra_def_t,
323 rrd->stat_head->rra_cnt);
325 /* handle different format for the live_head */
326 if (version < 3) {
327 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
328 if (rrd->live_head == NULL) {
329 rrd_set_error("live_head_t malloc");
330 goto out_close;
331 }
332 #ifdef HAVE_MMAP
333 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
334 offset += sizeof(long);
335 #else
336 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
337 #endif
338 rrd->live_head->last_up_usec = 0;
339 } else {
340 #if defined USE_MADVISE && !defined ONE_PAGE
341 /* the live_head will be needed soonish, so hint accordingly */
342 _madvise(data + PAGE_ALIGN_DOWN(offset),
343 PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
344 #endif
345 __rrd_read(rrd->live_head, live_head_t,
346 1);
347 }
348 //XXX: This doesn't look like it needs madvise
349 __rrd_read(rrd->pdp_prep, pdp_prep_t,
350 rrd->stat_head->ds_cnt);
352 //XXX: This could benefit from madvise()ing
353 __rrd_read(rrd->cdp_prep, cdp_prep_t,
354 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
356 //XXX: This could benefit from madvise()ing
357 __rrd_read(rrd->rra_ptr, rra_ptr_t,
358 rrd->stat_head->rra_cnt);
360 rrd_file->header_len = offset;
361 rrd_file->pos = offset;
362 out_done:
363 return (rrd_file);
364 out_nullify_head:
365 rrd->stat_head = NULL;
366 out_close:
367 close(rrd_file->fd);
368 out_free:
369 free(rrd_file);
370 return NULL;
371 }
374 /* Close a reference to an rrd_file. */
376 int rrd_close(
377 rrd_file_t *rrd_file)
378 {
379 int ret;
381 #if defined HAVE_MMAP || defined DEBUG
382 ssize_t _page_size = sysconf(_SC_PAGESIZE);
383 #endif
384 #if defined DEBUG && DEBUG > 1
385 /* pretty print blocks in core */
386 off_t off;
387 unsigned char *vec;
389 off = rrd_file->file_len +
390 ((rrd_file->file_len + _page_size - 1) / _page_size);
391 vec = malloc(off);
392 if (vec != NULL) {
393 memset(vec, 0, off);
394 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
395 int prev;
396 unsigned is_in = 0, was_in = 0;
398 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
399 is_in = vec[off] & 1; /* if lsb set then is core resident */
400 if (off == 0)
401 was_in = is_in;
402 if (was_in != is_in) {
403 fprintf(stderr, "%sin core: %p len %ld\n",
404 was_in ? "" : "not ", vec + prev, off - prev);
405 was_in = is_in;
406 prev = off;
407 }
408 }
409 fprintf(stderr,
410 "%sin core: %p len %ld\n",
411 was_in ? "" : "not ", vec + prev, off - prev);
412 } else
413 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
414 }
415 #endif /* DEBUG */
417 #ifdef USE_MADVISE
418 # ifdef ONE_PAGE
419 /* Keep headers around, round up to next page boundary. */
420 ret =
421 PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
422 if (rrd_file->file_len > ret)
423 _madvise(rrd_file->file_start + ret,
424 rrd_file->file_len - ret, MADV_DONTNEED);
425 # else
426 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
427 _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
428 rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
429 MADV_DONTNEED);
430 # endif
431 #endif
432 #ifdef HAVE_MMAP
433 ret = munmap(rrd_file->file_start, rrd_file->file_len);
434 if (ret != 0)
435 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
436 #endif
437 ret = close(rrd_file->fd);
438 if (ret != 0)
439 rrd_set_error("closing file: %s", rrd_strerror(errno));
440 free(rrd_file);
441 rrd_file = NULL;
442 return ret;
443 }
446 /* Set position of rrd_file. */
448 off_t rrd_seek(
449 rrd_file_t *rrd_file,
450 off_t off,
451 int whence)
452 {
453 off_t ret = 0;
455 #ifdef HAVE_MMAP
456 if (whence == SEEK_SET)
457 rrd_file->pos = off;
458 else if (whence == SEEK_CUR)
459 rrd_file->pos += off;
460 else if (whence == SEEK_END)
461 rrd_file->pos = rrd_file->file_len + off;
462 #else
463 ret = lseek(rrd_file->fd, off, whence);
464 if (ret < 0)
465 rrd_set_error("lseek: %s", rrd_strerror(errno));
466 rrd_file->pos = ret;
467 #endif
468 //XXX: mimic fseek, which returns 0 upon success
469 return ret == -1; //XXX: or just ret to mimic lseek
470 }
473 /* Get current position in rrd_file. */
475 inline off_t rrd_tell(
476 rrd_file_t *rrd_file)
477 {
478 return rrd_file->pos;
479 }
482 /* read count bytes into buffer buf, starting at rrd_file->pos.
483 * Returns the number of bytes read or <0 on error. */
485 inline ssize_t rrd_read(
486 rrd_file_t *rrd_file,
487 void *buf,
488 size_t count)
489 {
490 #ifdef HAVE_MMAP
491 size_t _cnt = count;
492 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
494 if (_surplus > 0) { /* short read */
495 _cnt -= _surplus;
496 }
497 if (_cnt == 0)
498 return 0; /* EOF */
499 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
501 rrd_file->pos += _cnt; /* mimmic read() semantics */
502 return _cnt;
503 #else
504 ssize_t ret;
506 ret = read(rrd_file->fd, buf, count);
507 if (ret > 0)
508 rrd_file->pos += ret; /* mimmic read() semantics */
509 return ret;
510 #endif
511 }
514 /* write count bytes from buffer buf to the current position
515 * rrd_file->pos of rrd_file->fd.
516 * Returns the number of bytes written. */
518 inline ssize_t rrd_write(
519 rrd_file_t *rrd_file,
520 const void *buf,
521 size_t count)
522 {
523 #ifdef HAVE_MMAP
524 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
525 rrd_file->pos += count;
526 return count; /* mimmic write() semantics */
527 #else
528 ssize_t _sz = write(rrd_file->fd, buf, count);
530 if (_sz > 0)
531 rrd_file->pos += _sz;
532 return _sz;
533 #endif
534 }
537 /* flush all data pending to be written to FD. */
539 inline void rrd_flush(
540 rrd_file_t *rrd_file)
541 {
542 if (fdatasync(rrd_file->fd) != 0) {
543 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
544 rrd_strerror(errno));
545 }
546 }
549 /* Initialize RRD header. */
551 void rrd_init(
552 rrd_t *rrd)
553 {
554 rrd->stat_head = NULL;
555 rrd->ds_def = NULL;
556 rrd->rra_def = NULL;
557 rrd->live_head = NULL;
558 rrd->rra_ptr = NULL;
559 rrd->pdp_prep = NULL;
560 rrd->cdp_prep = NULL;
561 rrd->rrd_value = NULL;
562 }
565 /* free RRD header data. */
567 #ifdef HAVE_MMAP
568 inline void rrd_free(
569 rrd_t UNUSED(*rrd))
570 {
571 }
572 #else
573 void rrd_free(
574 rrd_t *rrd)
575 {
576 free(rrd->live_head);
577 free(rrd->stat_head);
578 free(rrd->ds_def);
579 free(rrd->rra_def);
580 free(rrd->rra_ptr);
581 free(rrd->pdp_prep);
582 free(rrd->cdp_prep);
583 free(rrd->rrd_value);
584 }
585 #endif
588 /* routine used by external libraries to free memory allocated by
589 * rrd library */
591 void rrd_freemem(
592 void *mem)
593 {
594 free(mem);
595 }
598 /* XXX: FIXME: missing documentation. */
599 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
601 int /*_rrd_*/ readfile(
602 const char *file_name,
603 char **buffer,
604 int skipfirst)
605 {
606 long writecnt = 0, totalcnt = MEMBLK;
607 long offset = 0;
608 FILE *input = NULL;
609 char c;
611 if ((strcmp("-", file_name) == 0)) {
612 input = stdin;
613 } else {
614 if ((input = fopen(file_name, "rb")) == NULL) {
615 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
616 return (-1);
617 }
618 }
619 if (skipfirst) {
620 do {
621 c = getc(input);
622 offset++;
623 } while (c != '\n' && !feof(input));
624 }
625 if (strcmp("-", file_name)) {
626 fseek(input, 0, SEEK_END);
627 /* have extra space for detecting EOF without realloc */
628 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
629 if (totalcnt < MEMBLK)
630 totalcnt = MEMBLK; /* sanitize */
631 fseek(input, offset * sizeof(char), SEEK_SET);
632 }
633 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
634 perror("Allocate Buffer:");
635 exit(1);
636 };
637 do {
638 writecnt +=
639 fread((*buffer) + writecnt, 1,
640 (totalcnt - writecnt) * sizeof(char), input);
641 if (writecnt >= totalcnt) {
642 totalcnt += MEMBLK;
643 if (((*buffer) =
644 rrd_realloc((*buffer),
645 (totalcnt + 4) * sizeof(char))) == NULL) {
646 perror("Realloc Buffer:");
647 exit(1);
648 };
649 }
650 } while (!feof(input));
651 (*buffer)[writecnt] = '\0';
652 if (strcmp("-", file_name) != 0) {
653 fclose(input);
654 };
655 return writecnt;
656 }