7812da183b5dd5e61a7da9e34bdeab0dc3bc8291
1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 * $Log$
8 * Revision 1.10 2004/05/26 22:11:12 oetiker
9 * reduce compiler warnings. Many small fixes. -- Mike Slifcak <slif@bellsouth.net>
10 *
11 * Revision 1.9 2003/04/29 21:56:49 oetiker
12 * readline in rrd_open.c reads the file in 8 KB blocks, and calls realloc for
13 * each block. realloc is very slow in Mac OS X for huge blocks, e.g. when
14 * restoring databases from huge xml files. This patch finds the size of the
15 * file, and starts out with malloc'ing the full size.
16 * -- Peter Speck <speck@ruc.dk>
17 *
18 * Revision 1.8 2003/04/11 19:43:44 oetiker
19 * New special value COUNT which allows calculations based on the position of a
20 * value within a data set. Bug fix in rrd_rpncalc.c. PREV returned erroneus
21 * value for the second value. Bug fix in rrd_restore.c. Bug causing seek error
22 * when accesing an RRD restored from an xml that holds an RRD version <3.
23 * -- Ruben Justo <ruben@ainek.com>
24 *
25 * Revision 1.7 2003/03/31 21:22:12 oetiker
26 * enables RRDtool updates with microsecond or in case of windows millisecond
27 * precision. This is needed to reduce time measurement error when archive step
28 * is small. (<30s) -- Sasha Mikheev <sasha@avalon-net.co.il>
29 *
30 * Revision 1.6 2003/02/13 07:05:27 oetiker
31 * Find attached the patch I promised to send to you. Please note that there
32 * are three new source files (src/rrd_is_thread_safe.h, src/rrd_thread_safe.c
33 * and src/rrd_not_thread_safe.c) and the introduction of librrd_th. This
34 * library is identical to librrd, but it contains support code for per-thread
35 * global variables currently used for error information only. This is similar
36 * to how errno per-thread variables are implemented. librrd_th must be linked
37 * alongside of libpthred
38 *
39 * There is also a new file "THREADS", holding some documentation.
40 *
41 * -- Peter Stamfest <peter@stamfest.at>
42 *
43 * Revision 1.5 2002/06/20 00:21:03 jake
44 * More Win32 build changes; thanks to Kerry Calvert.
45 *
46 * Revision 1.4 2002/02/01 20:34:49 oetiker
47 * fixed version number and date/time
48 *
49 * Revision 1.3 2001/03/04 13:01:55 oetiker
50 * Aberrant Behavior Detection support. A brief overview added to rrdtool.pod.
51 * Major updates to rrd_update.c, rrd_create.c. Minor update to other core files.
52 * This is backwards compatible! But new files using the Aberrant stuff are not readable
53 * by old rrdtool versions. See http://cricket.sourceforge.net/aberrant/rrd_hw.htm
54 * -- Jake Brutlag <jakeb@corp.webtv.net>
55 *
56 * Revision 1.2 2001/03/04 10:29:20 oetiker
57 * fixed filedescriptor leak
58 * -- Mike Franusich <mike@franusich.com>
59 *
60 * Revision 1.1.1.1 2001/02/25 22:25:05 oetiker
61 * checkin
62 *
63 *****************************************************************************/
65 #include "rrd_tool.h"
66 #include "unused.h"
67 #define MEMBLK 8192
69 /* DEBUG 2 prints information obtained via mincore(2) */
70 // #define DEBUG 2
71 /* do not calculate exact madvise hints but assume 1 page for headers and
72 * set DONTNEED for the rest, which is assumed to be data */
73 //#define ONE_PAGE 1
74 /* Avoid calling madvise on areas that were already hinted. May be benefical if
75 * your syscalls are very slow */
76 #define CHECK_MADVISE_OVERLAPS 1
78 #ifdef HAVE_MMAP
79 /* the cast to void* is there to avoid this warning seen on ia64 with certain
80 versions of gcc: 'cast increases required alignment of target type'
81 */
82 #define __rrd_read(dst, dst_t, cnt) \
83 (dst) = (dst_t*)(void*) (data + offset); \
84 offset += sizeof(dst_t) * (cnt)
85 #else
86 #define __rrd_read(dst, dst_t, cnt) \
87 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
88 rrd_set_error(#dst " malloc"); \
89 goto out_nullify_head; \
90 } \
91 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
92 #endif
94 /* next page-aligned (i.e. page-align up) */
95 #ifndef PAGE_ALIGN
96 #define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
97 #endif
98 /* previous page-aligned (i.e. page-align down) */
99 #ifndef PAGE_ALIGN_DOWN
100 #define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
101 #endif
103 #ifdef HAVE_MMAP
104 /* vector of last madvise hint */
105 typedef struct _madvise_vec_t {
106 void *start;
107 ssize_t length;
108 } _madvise_vec_t;
109 _madvise_vec_t _madv_vec = { NULL, 0 };
110 #endif
112 #if defined CHECK_MADVISE_OVERLAPS
113 #define _madvise(_start, _off, _hint) \
114 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
115 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
116 madvise((_start), (_off), (_hint)); \
117 }
118 #else
119 #define _madvise(_start, _off, _hint) \
120 madvise((_start), (_off), (_hint))
121 #endif
123 /* Open a database file, return its header and an open filehandle,
124 * positioned to the first cdp in the first rra.
125 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
126 * before returning an error. Do not call rrd_close upon failure of rrd_open.
127 */
129 rrd_file_t *rrd_open(
130 const char *const file_name,
131 rrd_t *rrd,
132 unsigned rdwr)
133 {
134 int flags = 0;
135 mode_t mode = S_IRUSR;
136 int version;
138 #ifdef HAVE_MMAP
139 ssize_t _page_size = sysconf(_SC_PAGESIZE);
140 int mm_prot = PROT_READ, mm_flags = 0;
141 char *data;
142 #endif
143 off_t offset = 0;
144 struct stat statb;
145 rrd_file_t *rrd_file = NULL;
146 off_t newfile_size = 0;
148 if (rdwr & RRD_CREAT)
149 newfile_size = (off_t) rrd->stat_head;
150 rrd_init(rrd);
151 rrd_file = malloc(sizeof(rrd_file_t));
152 if (rrd_file == NULL) {
153 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
154 return NULL;
155 }
156 memset(rrd_file, 0, sizeof(rrd_file_t));
158 #ifdef DEBUG
159 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
160 (RRD_READONLY | RRD_READWRITE)) {
161 /* Both READONLY and READWRITE were given, which is invalid. */
162 rrd_set_error("in read/write request mask");
163 exit(-1);
164 }
165 #endif
166 if (rdwr & RRD_READONLY) {
167 flags |= O_RDONLY;
168 #ifdef HAVE_MMAP
169 mm_flags = MAP_PRIVATE;
170 # ifdef MAP_NORESERVE
171 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
172 # endif
173 #endif
174 } else {
175 if (rdwr & RRD_READWRITE) {
176 mode |= S_IWUSR;
177 flags |= O_RDWR;
178 #ifdef HAVE_MMAP
179 mm_flags = MAP_SHARED;
180 mm_prot |= PROT_WRITE;
181 #endif
182 }
183 if (rdwr & RRD_CREAT) {
184 flags |= (O_CREAT | O_TRUNC);
185 }
186 }
187 if (rdwr & RRD_READAHEAD) {
188 #ifdef MAP_POPULATE
189 mm_flags |= MAP_POPULATE; /* populate ptes and data */
190 #endif
191 #if defined MAP_NONBLOCK
192 // if (!(rdwr & RRD_COPY))
193 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
194 #endif
195 #ifdef USE_DIRECT_IO
196 } else {
197 flags |= O_DIRECT;
198 #endif
199 }
200 #ifdef O_NONBLOCK
201 flags |= O_NONBLOCK;
202 #endif
204 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
205 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
206 goto out_free;
207 }
209 /* Better try to avoid seeks as much as possible. stat may be heavy but
210 * many concurrent seeks are even worse. */
211 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
212 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
213 goto out_close;
214 }
215 if (newfile_size == 0) {
216 rrd_file->file_len = statb.st_size;
217 } else {
218 rrd_file->file_len = newfile_size;
219 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
220 write(rrd_file->fd, "\0", 1); /* poke */
221 lseek(rrd_file->fd, 0, SEEK_SET);
222 }
223 #ifdef HAVE_POSIX_FADVISE
224 /* In general we need no read-ahead when dealing with rrd_files.
225 When we stop reading, it is highly unlikely that we start up again.
226 In this manner we actually save time and diskaccess (and buffer cache).
227 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
228 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
229 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
230 rrd_strerror(errno));
231 goto out_close;
232 }
233 #endif
235 /*
236 if (rdwr & RRD_READWRITE)
237 {
238 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
239 rrd_set_error("failed to disable the stream buffer\n");
240 return (-1);
241 }
242 }
243 */
244 #ifdef HAVE_MMAP
245 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
246 rrd_file->fd, offset);
248 /* lets see if the first read worked */
249 if (data == MAP_FAILED) {
250 rrd_set_error("mmaping file '%s': %s", file_name,
251 rrd_strerror(errno));
252 goto out_close;
253 }
254 rrd_file->file_start = data;
255 if (rdwr & RRD_CREAT) {
256 goto out_done;
257 }
258 #endif
259 #ifdef USE_MADVISE
260 if (rdwr & RRD_COPY) {
261 /* We will read everything in a moment (copying) */
262 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
263 } else {
264 # ifndef ONE_PAGE
265 /* We do not need to read anything in for the moment */
266 _madvise(data, rrd_file->file_len, MADV_DONTNEED);
267 /* the stat_head will be needed soonish, so hint accordingly */
268 _madvise(data + PAGE_ALIGN_DOWN(offset),
269 PAGE_ALIGN(sizeof(stat_head_t)),
270 MADV_WILLNEED | MADV_RANDOM);
272 # else
273 /* alternatively: keep 1 page worth of data, likely headers,
274 * don't need the rest. */
275 _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
276 _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
277 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
278 # endif
279 }
280 #endif
282 __rrd_read(rrd->stat_head, stat_head_t,
283 1);
285 /* lets do some test if we are on track ... */
286 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
287 rrd_set_error("'%s' is not an RRD file", file_name);
288 goto out_nullify_head;
289 }
291 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
292 rrd_set_error("This RRD was created on another architecture");
293 goto out_nullify_head;
294 }
296 version = atoi(rrd->stat_head->version);
298 if (version > atoi(RRD_VERSION)) {
299 rrd_set_error("can't handle RRD file version %s",
300 rrd->stat_head->version);
301 goto out_nullify_head;
302 }
303 #if defined USE_MADVISE && !defined ONE_PAGE
304 /* the ds_def will be needed soonish, so hint accordingly */
305 _madvise(data + PAGE_ALIGN_DOWN(offset),
306 PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
307 MADV_WILLNEED);
308 #endif
309 __rrd_read(rrd->ds_def, ds_def_t,
310 rrd->stat_head->ds_cnt);
312 #if defined USE_MADVISE && !defined ONE_PAGE
313 /* the rra_def will be needed soonish, so hint accordingly */
314 _madvise(data + PAGE_ALIGN_DOWN(offset),
315 PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
316 MADV_WILLNEED);
317 #endif
318 __rrd_read(rrd->rra_def, rra_def_t,
319 rrd->stat_head->rra_cnt);
321 /* handle different format for the live_head */
322 if (version < 3) {
323 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
324 if (rrd->live_head == NULL) {
325 rrd_set_error("live_head_t malloc");
326 goto out_close;
327 }
328 #ifdef HAVE_MMAP
329 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
330 offset += sizeof(long);
331 #else
332 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
333 #endif
334 rrd->live_head->last_up_usec = 0;
335 } else {
336 #if defined USE_MADVISE && !defined ONE_PAGE
337 /* the live_head will be needed soonish, so hint accordingly */
338 _madvise(data + PAGE_ALIGN_DOWN(offset),
339 PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
340 #endif
341 __rrd_read(rrd->live_head, live_head_t,
342 1);
343 }
344 //XXX: This doesn't look like it needs madvise
345 __rrd_read(rrd->pdp_prep, pdp_prep_t,
346 rrd->stat_head->ds_cnt);
348 //XXX: This could benefit from madvise()ing
349 __rrd_read(rrd->cdp_prep, cdp_prep_t,
350 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
352 //XXX: This could benefit from madvise()ing
353 __rrd_read(rrd->rra_ptr, rra_ptr_t,
354 rrd->stat_head->rra_cnt);
356 rrd_file->header_len = offset;
357 rrd_file->pos = offset;
358 #ifdef USE_MADVISE
359 out_done:
360 #endif
361 return (rrd_file);
362 out_nullify_head:
363 rrd->stat_head = NULL;
364 out_close:
365 close(rrd_file->fd);
366 out_free:
367 free(rrd_file);
368 return NULL;
369 }
372 /* Close a reference to an rrd_file. */
374 int rrd_close(
375 rrd_file_t *rrd_file)
376 {
377 int ret;
379 #if defined HAVE_MMAP || defined DEBUG
380 ssize_t _page_size = sysconf(_SC_PAGESIZE);
381 #endif
382 #if defined DEBUG && DEBUG > 1
383 /* pretty print blocks in core */
384 off_t off;
385 unsigned char *vec;
387 off = rrd_file->file_len +
388 ((rrd_file->file_len + _page_size - 1) / _page_size);
389 vec = malloc(off);
390 if (vec != NULL) {
391 memset(vec, 0, off);
392 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
393 int prev;
394 unsigned is_in = 0, was_in = 0;
396 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
397 is_in = vec[off] & 1; /* if lsb set then is core resident */
398 if (off == 0)
399 was_in = is_in;
400 if (was_in != is_in) {
401 fprintf(stderr, "%sin core: %p len %ld\n",
402 was_in ? "" : "not ", vec + prev, off - prev);
403 was_in = is_in;
404 prev = off;
405 }
406 }
407 fprintf(stderr,
408 "%sin core: %p len %ld\n",
409 was_in ? "" : "not ", vec + prev, off - prev);
410 } else
411 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
412 }
413 #endif /* DEBUG */
415 #ifdef USE_MADVISE
416 # ifdef ONE_PAGE
417 /* Keep headers around, round up to next page boundary. */
418 ret =
419 PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
420 if (rrd_file->file_len > ret)
421 _madvise(rrd_file->file_start + ret,
422 rrd_file->file_len - ret, MADV_DONTNEED);
423 # else
424 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
425 _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
426 rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
427 MADV_DONTNEED);
428 # endif
429 #endif
430 #ifdef HAVE_MMAP
431 ret = munmap(rrd_file->file_start, rrd_file->file_len);
432 if (ret != 0)
433 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
434 #endif
435 ret = close(rrd_file->fd);
436 if (ret != 0)
437 rrd_set_error("closing file: %s", rrd_strerror(errno));
438 free(rrd_file);
439 rrd_file = NULL;
440 return ret;
441 }
444 /* Set position of rrd_file. */
446 off_t rrd_seek(
447 rrd_file_t *rrd_file,
448 off_t off,
449 int whence)
450 {
451 off_t ret = 0;
453 #ifdef HAVE_MMAP
454 if (whence == SEEK_SET)
455 rrd_file->pos = off;
456 else if (whence == SEEK_CUR)
457 rrd_file->pos += off;
458 else if (whence == SEEK_END)
459 rrd_file->pos = rrd_file->file_len + off;
460 #else
461 ret = lseek(rrd_file->fd, off, whence);
462 if (ret < 0)
463 rrd_set_error("lseek: %s", rrd_strerror(errno));
464 rrd_file->pos = ret;
465 #endif
466 //XXX: mimic fseek, which returns 0 upon success
467 return ret == -1; //XXX: or just ret to mimic lseek
468 }
471 /* Get current position in rrd_file. */
473 inline off_t rrd_tell(
474 rrd_file_t *rrd_file)
475 {
476 return rrd_file->pos;
477 }
480 /* read count bytes into buffer buf, starting at rrd_file->pos.
481 * Returns the number of bytes read or <0 on error. */
483 inline ssize_t rrd_read(
484 rrd_file_t *rrd_file,
485 void *buf,
486 size_t count)
487 {
488 #ifdef HAVE_MMAP
489 size_t _cnt = count;
490 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
491 if (_surplus > 0) { /* short read */
492 _cnt -= _surplus;
493 }
494 if (_cnt == 0)
495 return 0; /* EOF */
496 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
498 rrd_file->pos += _cnt; /* mimmic read() semantics */
499 return _cnt;
500 #else
501 ssize_t ret;
503 ret = read(rrd_file->fd, buf, count);
504 if (ret > 0)
505 rrd_file->pos += ret; /* mimmic read() semantics */
506 return ret;
507 #endif
508 }
511 /* write count bytes from buffer buf to the current position
512 * rrd_file->pos of rrd_file->fd.
513 * Returns the number of bytes written. */
515 inline ssize_t rrd_write(
516 rrd_file_t *rrd_file,
517 const void *buf,
518 size_t count)
519 {
520 #ifdef HAVE_MMAP
521 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
522 rrd_file->pos += count;
523 return count; /* mimmic write() semantics */
524 #else
525 ssize_t _sz = write(rrd_file->fd, buf, count);
527 if (_sz > 0)
528 rrd_file->pos += _sz;
529 return _sz;
530 #endif
531 }
534 /* flush all data pending to be written to FD. */
536 inline void rrd_flush(
537 rrd_file_t *rrd_file)
538 {
539 if (fdatasync(rrd_file->fd) != 0) {
540 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
541 rrd_strerror(errno));
542 }
543 }
546 /* Initialize RRD header. */
548 void rrd_init(
549 rrd_t *rrd)
550 {
551 rrd->stat_head = NULL;
552 rrd->ds_def = NULL;
553 rrd->rra_def = NULL;
554 rrd->live_head = NULL;
555 rrd->rra_ptr = NULL;
556 rrd->pdp_prep = NULL;
557 rrd->cdp_prep = NULL;
558 rrd->rrd_value = NULL;
559 }
562 /* free RRD header data. */
564 #ifdef HAVE_MMAP
565 inline void rrd_free(
566 rrd_t UNUSED(*rrd))
567 {
568 }
569 #else
570 void rrd_free(
571 rrd_t *rrd)
572 {
573 if (atoi(rrd->stat_head->version) < 3)
574 free(rrd->live_head);
575 free(rrd->stat_head);
576 free(rrd->ds_def);
577 free(rrd->rra_def);
578 free(rrd->rra_ptr);
579 free(rrd->pdp_prep);
580 free(rrd->cdp_prep);
581 free(rrd->rrd_value);
582 }
583 #endif
586 /* routine used by external libraries to free memory allocated by
587 * rrd library */
589 void rrd_freemem(
590 void *mem)
591 {
592 free(mem);
593 }
596 /* XXX: FIXME: missing documentation. */
597 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
599 int /*_rrd_*/ readfile(
600 const char *file_name,
601 char **buffer,
602 int skipfirst)
603 {
604 long writecnt = 0, totalcnt = MEMBLK;
605 long offset = 0;
606 FILE *input = NULL;
607 char c;
609 if ((strcmp("-", file_name) == 0)) {
610 input = stdin;
611 } else {
612 if ((input = fopen(file_name, "rb")) == NULL) {
613 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
614 return (-1);
615 }
616 }
617 if (skipfirst) {
618 do {
619 c = getc(input);
620 offset++;
621 } while (c != '\n' && !feof(input));
622 }
623 if (strcmp("-", file_name)) {
624 fseek(input, 0, SEEK_END);
625 /* have extra space for detecting EOF without realloc */
626 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
627 if (totalcnt < MEMBLK)
628 totalcnt = MEMBLK; /* sanitize */
629 fseek(input, offset * sizeof(char), SEEK_SET);
630 }
631 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
632 perror("Allocate Buffer:");
633 exit(1);
634 };
635 do {
636 writecnt +=
637 fread((*buffer) + writecnt, 1,
638 (totalcnt - writecnt) * sizeof(char), input);
639 if (writecnt >= totalcnt) {
640 totalcnt += MEMBLK;
641 if (((*buffer) =
642 rrd_realloc((*buffer),
643 (totalcnt + 4) * sizeof(char))) == NULL) {
644 perror("Realloc Buffer:");
645 exit(1);
646 };
647 }
648 } while (!feof(input));
649 (*buffer)[writecnt] = '\0';
650 if (strcmp("-", file_name) != 0) {
651 fclose(input);
652 };
653 return writecnt;
654 }