1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 * $Log$
8 * Revision 1.10 2004/05/26 22:11:12 oetiker
9 * reduce compiler warnings. Many small fixes. -- Mike Slifcak <slif@bellsouth.net>
10 *
11 * Revision 1.9 2003/04/29 21:56:49 oetiker
12 * readline in rrd_open.c reads the file in 8 KB blocks, and calls realloc for
13 * each block. realloc is very slow in Mac OS X for huge blocks, e.g. when
14 * restoring databases from huge xml files. This patch finds the size of the
15 * file, and starts out with malloc'ing the full size.
16 * -- Peter Speck <speck@ruc.dk>
17 *
18 * Revision 1.8 2003/04/11 19:43:44 oetiker
19 * New special value COUNT which allows calculations based on the position of a
20 * value within a data set. Bug fix in rrd_rpncalc.c. PREV returned erroneus
21 * value for the second value. Bug fix in rrd_restore.c. Bug causing seek error
22 * when accesing an RRD restored from an xml that holds an RRD version <3.
23 * -- Ruben Justo <ruben@ainek.com>
24 *
25 * Revision 1.7 2003/03/31 21:22:12 oetiker
26 * enables RRDtool updates with microsecond or in case of windows millisecond
27 * precision. This is needed to reduce time measurement error when archive step
28 * is small. (<30s) -- Sasha Mikheev <sasha@avalon-net.co.il>
29 *
30 * Revision 1.6 2003/02/13 07:05:27 oetiker
31 * Find attached the patch I promised to send to you. Please note that there
32 * are three new source files (src/rrd_is_thread_safe.h, src/rrd_thread_safe.c
33 * and src/rrd_not_thread_safe.c) and the introduction of librrd_th. This
34 * library is identical to librrd, but it contains support code for per-thread
35 * global variables currently used for error information only. This is similar
36 * to how errno per-thread variables are implemented. librrd_th must be linked
37 * alongside of libpthred
38 *
39 * There is also a new file "THREADS", holding some documentation.
40 *
41 * -- Peter Stamfest <peter@stamfest.at>
42 *
43 * Revision 1.5 2002/06/20 00:21:03 jake
44 * More Win32 build changes; thanks to Kerry Calvert.
45 *
46 * Revision 1.4 2002/02/01 20:34:49 oetiker
47 * fixed version number and date/time
48 *
49 * Revision 1.3 2001/03/04 13:01:55 oetiker
50 * Aberrant Behavior Detection support. A brief overview added to rrdtool.pod.
51 * Major updates to rrd_update.c, rrd_create.c. Minor update to other core files.
52 * This is backwards compatible! But new files using the Aberrant stuff are not readable
53 * by old rrdtool versions. See http://cricket.sourceforge.net/aberrant/rrd_hw.htm
54 * -- Jake Brutlag <jakeb@corp.webtv.net>
55 *
56 * Revision 1.2 2001/03/04 10:29:20 oetiker
57 * fixed filedescriptor leak
58 * -- Mike Franusich <mike@franusich.com>
59 *
60 * Revision 1.1.1.1 2001/02/25 22:25:05 oetiker
61 * checkin
62 *
63 *****************************************************************************/
65 #include "rrd_tool.h"
66 #include "unused.h"
67 #define MEMBLK 8192
69 /* DEBUG 2 prints information obtained via mincore(2) */
70 // #define DEBUG 2
71 /* do not calculate exact madvise hints but assume 1 page for headers and
72 * set DONTNEED for the rest, which is assumed to be data */
73 //#define ONE_PAGE 1
74 /* Avoid calling madvise on areas that were already hinted. May be benefical if
75 * your syscalls are very slow */
76 #define CHECK_MADVISE_OVERLAPS 1
78 #ifdef HAVE_MMAP
79 /* the cast to void* is there to avoid this warning seen on ia64 with certain
80 versions of gcc: 'cast increases required alignment of target type'
81 */
82 #define __rrd_read(dst, dst_t, cnt) \
83 (dst) = (dst_t*)(void*) (data + offset); \
84 offset += sizeof(dst_t) * (cnt)
85 #else
86 #define __rrd_read(dst, dst_t, cnt) \
87 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
88 rrd_set_error(#dst " malloc"); \
89 goto out_nullify_head; \
90 } \
91 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
92 #endif
94 /* next page-aligned (i.e. page-align up) */
95 #ifndef PAGE_ALIGN
96 #define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
97 #endif
98 /* previous page-aligned (i.e. page-align down) */
99 #ifndef PAGE_ALIGN_DOWN
100 #define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
101 #endif
103 #ifdef HAVE_MMAP
104 /* vector of last madvise hint */
105 typedef struct _madvise_vec_t {
106 void *start;
107 ssize_t length;
108 } _madvise_vec_t;
109 _madvise_vec_t _madv_vec = { NULL, 0 };
110 #endif
112 #if defined CHECK_MADVISE_OVERLAPS
113 #define _madvise(_start, _off, _hint) \
114 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
115 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
116 madvise((_start), (_off), (_hint)); \
117 }
118 #else
119 #define _madvise(_start, _off, _hint) \
120 madvise((_start), (_off), (_hint))
121 #endif
123 /* Open a database file, return its header and an open filehandle,
124 * positioned to the first cdp in the first rra.
125 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
126 * before returning an error. Do not call rrd_close upon failure of rrd_open.
127 */
129 rrd_file_t *rrd_open(
130 const char *const file_name,
131 rrd_t *rrd,
132 unsigned rdwr)
133 {
134 int flags = 0;
135 mode_t mode = S_IRUSR;
136 int version;
138 #ifdef HAVE_MMAP
139 ssize_t _page_size = sysconf(_SC_PAGESIZE);
140 int mm_prot = PROT_READ, mm_flags = 0;
141 char *data;
142 #endif
143 off_t offset = 0;
144 struct stat statb;
145 rrd_file_t *rrd_file = NULL;
146 off_t newfile_size = 0;
148 if (rdwr & RRD_CREAT)
149 newfile_size = (off_t) rrd->stat_head;
150 rrd_init(rrd);
151 rrd_file = malloc(sizeof(rrd_file_t));
152 if (rrd_file == NULL) {
153 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
154 return NULL;
155 }
156 memset(rrd_file, 0, sizeof(rrd_file_t));
158 #ifdef DEBUG
159 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
160 (RRD_READONLY | RRD_READWRITE)) {
161 /* Both READONLY and READWRITE were given, which is invalid. */
162 rrd_set_error("in read/write request mask");
163 exit(-1);
164 }
165 #endif
166 if (rdwr & RRD_READONLY) {
167 flags |= O_RDONLY;
168 #ifdef HAVE_MMAP
169 mm_flags = MAP_PRIVATE;
170 # ifdef MAP_NORESERVE
171 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
172 # endif
173 #endif
174 } else {
175 if (rdwr & RRD_READWRITE) {
176 mode |= S_IWUSR;
177 flags |= O_RDWR;
178 #ifdef HAVE_MMAP
179 mm_flags = MAP_SHARED;
180 mm_prot |= PROT_WRITE;
181 #endif
182 }
183 if (rdwr & RRD_CREAT) {
184 flags |= (O_CREAT | O_TRUNC);
185 }
186 }
187 if (rdwr & RRD_READAHEAD) {
188 #ifdef MAP_POPULATE
189 mm_flags |= MAP_POPULATE; /* populate ptes and data */
190 #endif
191 #if defined MAP_NONBLOCK
192 // if (!(rdwr & RRD_COPY))
193 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
194 #endif
195 #ifdef USE_DIRECT_IO
196 } else {
197 flags |= O_DIRECT;
198 #endif
199 }
200 #ifdef O_NONBLOCK
201 flags |= O_NONBLOCK;
202 #endif
204 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
205 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
206 goto out_free;
207 }
209 /* Better try to avoid seeks as much as possible. stat may be heavy but
210 * many concurrent seeks are even worse. */
211 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
212 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
213 goto out_close;
214 }
215 if (newfile_size == 0) {
216 rrd_file->file_len = statb.st_size;
217 } else {
218 rrd_file->file_len = newfile_size;
219 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
220 write(rrd_file->fd, "\0", 1); /* poke */
221 lseek(rrd_file->fd, 0, SEEK_SET);
222 }
223 #ifdef HAVE_POSIX_FADVISE
224 /* In general we need no read-ahead when dealing with rrd_files.
225 When we stop reading, it is highly unlikely that we start up again.
226 In this manner we actually save time and diskaccess (and buffer cache).
227 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
228 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
229 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
230 rrd_strerror(errno));
231 goto out_close;
232 }
233 #endif
235 /*
236 if (rdwr & RRD_READWRITE)
237 {
238 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
239 rrd_set_error("failed to disable the stream buffer\n");
240 return (-1);
241 }
242 }
243 */
244 #ifdef HAVE_MMAP
245 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
246 rrd_file->fd, offset);
248 /* lets see if the first read worked */
249 if (data == MAP_FAILED) {
250 rrd_set_error("mmaping file '%s': %s", file_name,
251 rrd_strerror(errno));
252 goto out_close;
253 }
254 rrd_file->file_start = data;
255 if (rdwr & RRD_CREAT) {
256 memset(data, DNAN, newfile_size - 1);
257 goto out_done;
258 }
259 #endif
260 #ifdef USE_MADVISE
261 if (rdwr & RRD_COPY) {
262 /* We will read everything in a moment (copying) */
263 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
264 } else {
265 # ifndef ONE_PAGE
266 /* We do not need to read anything in for the moment */
267 _madvise(data, rrd_file->file_len, MADV_DONTNEED);
268 /* the stat_head will be needed soonish, so hint accordingly */
269 _madvise(data + PAGE_ALIGN_DOWN(offset),
270 PAGE_ALIGN(sizeof(stat_head_t)),
271 MADV_WILLNEED | MADV_RANDOM);
273 # else
274 /* alternatively: keep 1 page worth of data, likely headers,
275 * don't need the rest. */
276 _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
277 _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
278 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
279 # endif
280 }
281 #endif
283 __rrd_read(rrd->stat_head, stat_head_t,
284 1);
286 /* lets do some test if we are on track ... */
287 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
288 rrd_set_error("'%s' is not an RRD file", file_name);
289 goto out_nullify_head;
290 }
292 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
293 rrd_set_error("This RRD was created on another architecture");
294 goto out_nullify_head;
295 }
297 version = atoi(rrd->stat_head->version);
299 if (version > atoi(RRD_VERSION)) {
300 rrd_set_error("can't handle RRD file version %s",
301 rrd->stat_head->version);
302 goto out_nullify_head;
303 }
304 #if defined USE_MADVISE && !defined ONE_PAGE
305 /* the ds_def will be needed soonish, so hint accordingly */
306 _madvise(data + PAGE_ALIGN_DOWN(offset),
307 PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
308 MADV_WILLNEED);
309 #endif
310 __rrd_read(rrd->ds_def, ds_def_t,
311 rrd->stat_head->ds_cnt);
313 #if defined USE_MADVISE && !defined ONE_PAGE
314 /* the rra_def will be needed soonish, so hint accordingly */
315 _madvise(data + PAGE_ALIGN_DOWN(offset),
316 PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
317 MADV_WILLNEED);
318 #endif
319 __rrd_read(rrd->rra_def, rra_def_t,
320 rrd->stat_head->rra_cnt);
322 /* handle different format for the live_head */
323 if (version < 3) {
324 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
325 if (rrd->live_head == NULL) {
326 rrd_set_error("live_head_t malloc");
327 goto out_close;
328 }
329 #ifdef HAVE_MMAP
330 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
331 offset += sizeof(long);
332 #else
333 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
334 #endif
335 rrd->live_head->last_up_usec = 0;
336 } else {
337 #if defined USE_MADVISE && !defined ONE_PAGE
338 /* the live_head will be needed soonish, so hint accordingly */
339 _madvise(data + PAGE_ALIGN_DOWN(offset),
340 PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
341 #endif
342 __rrd_read(rrd->live_head, live_head_t,
343 1);
344 }
345 //XXX: This doesn't look like it needs madvise
346 __rrd_read(rrd->pdp_prep, pdp_prep_t,
347 rrd->stat_head->ds_cnt);
349 //XXX: This could benefit from madvise()ing
350 __rrd_read(rrd->cdp_prep, cdp_prep_t,
351 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
353 //XXX: This could benefit from madvise()ing
354 __rrd_read(rrd->rra_ptr, rra_ptr_t,
355 rrd->stat_head->rra_cnt);
357 rrd_file->header_len = offset;
358 rrd_file->pos = offset;
359 #ifdef USE_MADVISE
360 out_done:
361 #endif
362 return (rrd_file);
363 out_nullify_head:
364 rrd->stat_head = NULL;
365 out_close:
366 close(rrd_file->fd);
367 out_free:
368 free(rrd_file);
369 return NULL;
370 }
373 /* Close a reference to an rrd_file. */
375 int rrd_close(
376 rrd_file_t *rrd_file)
377 {
378 int ret;
380 #if defined HAVE_MMAP || defined DEBUG
381 ssize_t _page_size = sysconf(_SC_PAGESIZE);
382 #endif
383 #if defined DEBUG && DEBUG > 1
384 /* pretty print blocks in core */
385 off_t off;
386 unsigned char *vec;
388 off = rrd_file->file_len +
389 ((rrd_file->file_len + _page_size - 1) / _page_size);
390 vec = malloc(off);
391 if (vec != NULL) {
392 memset(vec, 0, off);
393 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
394 int prev;
395 unsigned is_in = 0, was_in = 0;
397 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
398 is_in = vec[off] & 1; /* if lsb set then is core resident */
399 if (off == 0)
400 was_in = is_in;
401 if (was_in != is_in) {
402 fprintf(stderr, "%sin core: %p len %ld\n",
403 was_in ? "" : "not ", vec + prev, off - prev);
404 was_in = is_in;
405 prev = off;
406 }
407 }
408 fprintf(stderr,
409 "%sin core: %p len %ld\n",
410 was_in ? "" : "not ", vec + prev, off - prev);
411 } else
412 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
413 }
414 #endif /* DEBUG */
416 #ifdef USE_MADVISE
417 # ifdef ONE_PAGE
418 /* Keep headers around, round up to next page boundary. */
419 ret =
420 PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
421 if (rrd_file->file_len > ret)
422 _madvise(rrd_file->file_start + ret,
423 rrd_file->file_len - ret, MADV_DONTNEED);
424 # else
425 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
426 _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
427 rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
428 MADV_DONTNEED);
429 # endif
430 #endif
431 #ifdef HAVE_MMAP
432 ret = munmap(rrd_file->file_start, rrd_file->file_len);
433 if (ret != 0)
434 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
435 #endif
436 ret = close(rrd_file->fd);
437 if (ret != 0)
438 rrd_set_error("closing file: %s", rrd_strerror(errno));
439 free(rrd_file);
440 rrd_file = NULL;
441 return ret;
442 }
445 /* Set position of rrd_file. */
447 off_t rrd_seek(
448 rrd_file_t *rrd_file,
449 off_t off,
450 int whence)
451 {
452 off_t ret = 0;
454 #ifdef HAVE_MMAP
455 if (whence == SEEK_SET)
456 rrd_file->pos = off;
457 else if (whence == SEEK_CUR)
458 rrd_file->pos += off;
459 else if (whence == SEEK_END)
460 rrd_file->pos = rrd_file->file_len + off;
461 #else
462 ret = lseek(rrd_file->fd, off, whence);
463 if (ret < 0)
464 rrd_set_error("lseek: %s", rrd_strerror(errno));
465 rrd_file->pos = ret;
466 #endif
467 //XXX: mimic fseek, which returns 0 upon success
468 return ret == -1; //XXX: or just ret to mimic lseek
469 }
472 /* Get current position in rrd_file. */
474 inline off_t rrd_tell(
475 rrd_file_t *rrd_file)
476 {
477 return rrd_file->pos;
478 }
481 /* read count bytes into buffer buf, starting at rrd_file->pos.
482 * Returns the number of bytes read or <0 on error. */
484 inline ssize_t rrd_read(
485 rrd_file_t *rrd_file,
486 void *buf,
487 size_t count)
488 {
489 #ifdef HAVE_MMAP
490 size_t _cnt = count;
491 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
492 if (_surplus > 0) { /* short read */
493 _cnt -= _surplus;
494 }
495 if (_cnt == 0)
496 return 0; /* EOF */
497 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
499 rrd_file->pos += _cnt; /* mimmic read() semantics */
500 return _cnt;
501 #else
502 ssize_t ret;
504 ret = read(rrd_file->fd, buf, count);
505 if (ret > 0)
506 rrd_file->pos += ret; /* mimmic read() semantics */
507 return ret;
508 #endif
509 }
512 /* write count bytes from buffer buf to the current position
513 * rrd_file->pos of rrd_file->fd.
514 * Returns the number of bytes written. */
516 inline ssize_t rrd_write(
517 rrd_file_t *rrd_file,
518 const void *buf,
519 size_t count)
520 {
521 #ifdef HAVE_MMAP
522 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
523 rrd_file->pos += count;
524 return count; /* mimmic write() semantics */
525 #else
526 ssize_t _sz = write(rrd_file->fd, buf, count);
528 if (_sz > 0)
529 rrd_file->pos += _sz;
530 return _sz;
531 #endif
532 }
535 /* flush all data pending to be written to FD. */
537 inline void rrd_flush(
538 rrd_file_t *rrd_file)
539 {
540 if (fdatasync(rrd_file->fd) != 0) {
541 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
542 rrd_strerror(errno));
543 }
544 }
547 /* Initialize RRD header. */
549 void rrd_init(
550 rrd_t *rrd)
551 {
552 rrd->stat_head = NULL;
553 rrd->ds_def = NULL;
554 rrd->rra_def = NULL;
555 rrd->live_head = NULL;
556 rrd->rra_ptr = NULL;
557 rrd->pdp_prep = NULL;
558 rrd->cdp_prep = NULL;
559 rrd->rrd_value = NULL;
560 }
563 /* free RRD header data. */
565 #ifdef HAVE_MMAP
566 inline void rrd_free(
567 rrd_t UNUSED(*rrd))
568 {
569 }
570 #else
571 void rrd_free(
572 rrd_t *rrd)
573 {
574 if (atoi(rrd->stat_head->version) < 3)
575 free(rrd->live_head);
576 free(rrd->stat_head);
577 free(rrd->ds_def);
578 free(rrd->rra_def);
579 free(rrd->rra_ptr);
580 free(rrd->pdp_prep);
581 free(rrd->cdp_prep);
582 free(rrd->rrd_value);
583 }
584 #endif
587 /* routine used by external libraries to free memory allocated by
588 * rrd library */
590 void rrd_freemem(
591 void *mem)
592 {
593 free(mem);
594 }
597 /* XXX: FIXME: missing documentation. */
598 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
600 int /*_rrd_*/ readfile(
601 const char *file_name,
602 char **buffer,
603 int skipfirst)
604 {
605 long writecnt = 0, totalcnt = MEMBLK;
606 long offset = 0;
607 FILE *input = NULL;
608 char c;
610 if ((strcmp("-", file_name) == 0)) {
611 input = stdin;
612 } else {
613 if ((input = fopen(file_name, "rb")) == NULL) {
614 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
615 return (-1);
616 }
617 }
618 if (skipfirst) {
619 do {
620 c = getc(input);
621 offset++;
622 } while (c != '\n' && !feof(input));
623 }
624 if (strcmp("-", file_name)) {
625 fseek(input, 0, SEEK_END);
626 /* have extra space for detecting EOF without realloc */
627 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
628 if (totalcnt < MEMBLK)
629 totalcnt = MEMBLK; /* sanitize */
630 fseek(input, offset * sizeof(char), SEEK_SET);
631 }
632 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
633 perror("Allocate Buffer:");
634 exit(1);
635 };
636 do {
637 writecnt +=
638 fread((*buffer) + writecnt, 1,
639 (totalcnt - writecnt) * sizeof(char), input);
640 if (writecnt >= totalcnt) {
641 totalcnt += MEMBLK;
642 if (((*buffer) =
643 rrd_realloc((*buffer),
644 (totalcnt + 4) * sizeof(char))) == NULL) {
645 perror("Realloc Buffer:");
646 exit(1);
647 };
648 }
649 } while (!feof(input));
650 (*buffer)[writecnt] = '\0';
651 if (strcmp("-", file_name) != 0) {
652 fclose(input);
653 };
654 return writecnt;
655 }