1 /*****************************************************************************
2 * RRDtool 1.3.7 Copyright by Tobi Oetiker, 1997-2009
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id: rrd_open.c 1781 2009-04-07 07:31:53Z oetiker $
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
12 #ifdef WIN32
13 #include <stdlib.h>
14 #include <fcntl.h>
15 #include <sys/stat.h>
16 #include <utime.h>
17 #endif
19 #define MEMBLK 8192
21 /* DEBUG 2 prints information obtained via mincore(2) */
22 #define DEBUG 1
23 /* do not calculate exact madvise hints but assume 1 page for headers and
24 * set DONTNEED for the rest, which is assumed to be data */
25 /* Avoid calling madvise on areas that were already hinted. May be benefical if
26 * your syscalls are very slow */
28 #ifdef HAVE_MMAP
29 /* the cast to void* is there to avoid this warning seen on ia64 with certain
30 versions of gcc: 'cast increases required alignment of target type'
31 */
32 #define __rrd_read(dst, dst_t, cnt) { \
33 size_t wanted = sizeof(dst_t)*(cnt); \
34 if (offset + wanted > rrd_file->file_len) { \
35 rrd_set_error("reached EOF while loading header " #dst); \
36 goto out_nullify_head; \
37 } \
38 (dst) = (dst_t*)(void*) (data + offset); \
39 offset += wanted; \
40 }
41 #else
42 #define __rrd_read(dst, dst_t, cnt) { \
43 size_t wanted = sizeof(dst_t)*(cnt); \
44 size_t got; \
45 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
46 rrd_set_error(#dst " malloc"); \
47 goto out_nullify_head; \
48 } \
49 got = read (rrd_file->fd, dst, wanted); \
50 if (got != wanted) { \
51 rrd_set_error("short read while reading header " #dst); \
52 goto out_nullify_head; \
53 } \
54 offset += got; \
55 }
56 #endif
58 /* get the address of the start of this page */
59 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
60 #ifndef PAGE_START
61 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
62 #endif
63 #endif
65 /* Open a database file, return its header and an open filehandle,
66 * positioned to the first cdp in the first rra.
67 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
68 * before returning an error. Do not call rrd_close upon failure of rrd_open.
69 */
71 rrd_file_t *rrd_open(
72 const char *const file_name,
73 rrd_t *rrd,
74 unsigned rdwr)
75 {
76 int flags = 0;
78 /* Win32 can't use S_IRUSR flag */
79 #ifndef WIN32
80 mode_t mode = S_IRUSR;
81 #else
82 int mode = 0;
83 #endif
84 int version;
86 #ifdef HAVE_MMAP
87 ssize_t _page_size = sysconf(_SC_PAGESIZE);
88 int mm_prot = PROT_READ, mm_flags = 0;
89 char *data = MAP_FAILED;
90 #endif
91 off_t offset = 0;
92 struct stat statb;
93 rrd_file_t *rrd_file = NULL;
94 off_t newfile_size = 0;
96 if (rdwr & RRD_CREAT) {
97 /* yes bad inline signaling alert, we are using the
98 floatcookie to pass the size in ... only used in resize */
99 newfile_size = (off_t) rrd->stat_head->float_cookie;
100 free(rrd->stat_head);
101 }
102 rrd_init(rrd);
103 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
104 if (rrd_file == NULL) {
105 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
106 return NULL;
107 }
108 memset(rrd_file, 0, sizeof(rrd_file_t));
110 #ifdef DEBUG
111 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
112 (RRD_READONLY | RRD_READWRITE)) {
113 /* Both READONLY and READWRITE were given, which is invalid. */
114 rrd_set_error("in read/write request mask");
115 exit(-1);
116 }
117 #endif
118 if (rdwr & RRD_READONLY) {
119 flags |= O_RDONLY;
120 #ifdef HAVE_MMAP
121 mm_flags = MAP_PRIVATE;
122 # ifdef MAP_NORESERVE
123 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
124 # endif
125 #endif
126 } else {
127 if (rdwr & RRD_READWRITE) {
128 #ifndef WIN32 // Win32 can't use this mode
129 mode |= S_IWUSR;
130 #endif
131 flags |= O_RDWR;
132 #ifdef HAVE_MMAP
133 mm_flags = MAP_SHARED;
134 mm_prot |= PROT_WRITE;
135 #endif
136 }
137 if (rdwr & RRD_CREAT) {
138 flags |= (O_CREAT | O_TRUNC);
139 }
140 }
141 if (rdwr & RRD_READAHEAD) {
142 #ifdef MAP_POPULATE
143 mm_flags |= MAP_POPULATE; /* populate ptes and data */
144 #endif
145 #if defined MAP_NONBLOCK
146 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
147 #endif
148 }
149 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
150 flags |= O_BINARY;
151 #endif
153 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
154 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
155 goto out_free;
156 }
158 #ifdef HAVE_MMAP
159 #ifdef HAVE_BROKEN_MS_ASYNC
160 if (rdwr & RRD_READWRITE) {
161 /* some unices, the files mtime does not get update
162 on msync MS_ASYNC, in order to help them,
163 we update the the timestamp at this point.
164 The thing happens pretty 'close' to the open
165 call so the chances of a race should be minimal.
167 Maybe ask your vendor to fix your OS ... */
168 utime(file_name,NULL);
169 }
170 #endif
171 #endif
173 /* Better try to avoid seeks as much as possible. stat may be heavy but
174 * many concurrent seeks are even worse. */
175 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
176 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
177 goto out_close;
178 }
179 if (newfile_size == 0) {
180 rrd_file->file_len = statb.st_size;
181 } else {
182 rrd_file->file_len = newfile_size;
183 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
184 write(rrd_file->fd, "\0", 1); /* poke */
185 lseek(rrd_file->fd, 0, SEEK_SET);
186 }
187 #ifdef HAVE_POSIX_FADVISE
188 /* In general we need no read-ahead when dealing with rrd_files.
189 When we stop reading, it is highly unlikely that we start up again.
190 In this manner we actually save time and diskaccess (and buffer cache).
191 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
192 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
193 #endif
195 /*
196 if (rdwr & RRD_READWRITE)
197 {
198 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
199 rrd_set_error("failed to disable the stream buffer\n");
200 return (-1);
201 }
202 }
203 */
204 #ifdef HAVE_MMAP
205 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
206 rrd_file->fd, offset);
208 /* lets see if the first read worked */
209 if (data == MAP_FAILED) {
210 rrd_set_error("mmaping file '%s': %s", file_name,
211 rrd_strerror(errno));
212 goto out_close;
213 }
214 rrd_file->file_start = data;
215 if (rdwr & RRD_CREAT) {
216 memset(data, DNAN, newfile_size - 1);
217 goto out_done;
218 }
219 #endif
220 if (rdwr & RRD_CREAT)
221 goto out_done;
222 #ifdef USE_MADVISE
223 if (rdwr & RRD_COPY) {
224 /* We will read everything in a moment (copying) */
225 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
226 } else {
227 /* We do not need to read anything in for the moment */
228 madvise(data, rrd_file->file_len, MADV_RANDOM);
229 /* the stat_head will be needed soonish, so hint accordingly */
230 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
231 }
232 #endif
234 __rrd_read(rrd->stat_head, stat_head_t,
235 1);
237 /* lets do some test if we are on track ... */
238 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
239 rrd_set_error("'%s' is not an RRD file", file_name);
240 goto out_nullify_head;
241 }
243 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
244 rrd_set_error("This RRD was created on another architecture");
245 goto out_nullify_head;
246 }
248 version = atoi(rrd->stat_head->version);
250 if (version > atoi(RRD_VERSION)) {
251 rrd_set_error("can't handle RRD file version %s",
252 rrd->stat_head->version);
253 goto out_nullify_head;
254 }
255 #if defined USE_MADVISE
256 /* the ds_def will be needed soonish, so hint accordingly */
257 madvise(data + PAGE_START(offset),
258 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
259 #endif
260 __rrd_read(rrd->ds_def, ds_def_t,
261 rrd->stat_head->ds_cnt);
263 #if defined USE_MADVISE
264 /* the rra_def will be needed soonish, so hint accordingly */
265 madvise(data + PAGE_START(offset),
266 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
267 #endif
268 __rrd_read(rrd->rra_def, rra_def_t,
269 rrd->stat_head->rra_cnt);
271 /* handle different format for the live_head */
272 if (version < 3) {
273 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
274 if (rrd->live_head == NULL) {
275 rrd_set_error("live_head_t malloc");
276 goto out_close;
277 }
278 #if defined USE_MADVISE
279 /* the live_head will be needed soonish, so hint accordingly */
280 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
281 #endif
282 __rrd_read(rrd->legacy_last_up, time_t,
283 1);
285 rrd->live_head->last_up = *rrd->legacy_last_up;
286 rrd->live_head->last_up_usec = 0;
287 } else {
288 #if defined USE_MADVISE
289 /* the live_head will be needed soonish, so hint accordingly */
290 madvise(data + PAGE_START(offset),
291 sizeof(live_head_t), MADV_WILLNEED);
292 #endif
293 __rrd_read(rrd->live_head, live_head_t,
294 1);
295 }
296 __rrd_read(rrd->pdp_prep, pdp_prep_t,
297 rrd->stat_head->ds_cnt);
298 __rrd_read(rrd->cdp_prep, cdp_prep_t,
299 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
300 __rrd_read(rrd->rra_ptr, rra_ptr_t,
301 rrd->stat_head->rra_cnt);
303 rrd_file->header_len = offset;
304 rrd_file->pos = offset;
306 {
307 unsigned long row_cnt = 0;
308 unsigned long i;
310 for (i=0; i<rrd->stat_head->rra_cnt; i++)
311 row_cnt += rrd->rra_def[i].row_cnt;
313 off_t correct_len = rrd_file->header_len +
314 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
316 if (correct_len > rrd_file->file_len)
317 {
318 rrd_set_error("'%s' is too small (should be %ld bytes)",
319 file_name, (long long) correct_len);
320 goto out_nullify_head;
321 }
322 }
324 out_done:
325 return (rrd_file);
326 out_nullify_head:
327 rrd->stat_head = NULL;
328 out_close:
329 #ifdef HAVE_MMAP
330 if (data != MAP_FAILED)
331 munmap(data, rrd_file->file_len);
332 #endif
333 close(rrd_file->fd);
334 out_free:
335 free(rrd_file);
336 return NULL;
337 }
340 #if defined DEBUG && DEBUG > 1
341 /* Print list of in-core pages of a the current rrd_file. */
342 static
343 void mincore_print(
344 rrd_file_t *rrd_file,
345 char *mark)
346 {
347 #ifdef HAVE_MMAP
348 /* pretty print blocks in core */
349 off_t off;
350 unsigned char *vec;
351 ssize_t _page_size = sysconf(_SC_PAGESIZE);
353 off = rrd_file->file_len +
354 ((rrd_file->file_len + _page_size - 1) / _page_size);
355 vec = malloc(off);
356 if (vec != NULL) {
357 memset(vec, 0, off);
358 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
359 int prev;
360 unsigned is_in = 0, was_in = 0;
362 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
363 is_in = vec[off] & 1; /* if lsb set then is core resident */
364 if (off == 0)
365 was_in = is_in;
366 if (was_in != is_in) {
367 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
368 was_in ? "" : "not ", vec + prev, off - prev);
369 was_in = is_in;
370 prev = off;
371 }
372 }
373 fprintf(stderr,
374 "%s: %sin core: %p len %ld\n", mark,
375 was_in ? "" : "not ", vec + prev, off - prev);
376 } else
377 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
378 }
379 #else
380 fprintf(stderr, "sorry mincore only works with mmap");
381 #endif
382 }
383 #endif /* defined DEBUG && DEBUG > 1 */
386 /* drop cache except for the header and the active pages */
387 void rrd_dontneed(
388 rrd_file_t *rrd_file,
389 rrd_t *rrd)
390 {
391 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
392 unsigned long dontneed_start;
393 unsigned long rra_start;
394 unsigned long active_block;
395 unsigned long i;
396 ssize_t _page_size = sysconf(_SC_PAGESIZE);
398 if (rrd_file == NULL) {
399 #if defined DEBUG && DEBUG
400 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
401 #endif
402 return;
403 }
405 #if defined DEBUG && DEBUG > 1
406 mincore_print(rrd_file, "before");
407 #endif
409 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
410 rra_start = rrd_file->header_len;
411 dontneed_start = PAGE_START(rra_start) + _page_size;
412 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
413 active_block =
414 PAGE_START(rra_start
415 + rrd->rra_ptr[i].cur_row
416 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
417 if (active_block > dontneed_start) {
418 #ifdef USE_MADVISE
419 madvise(rrd_file->file_start + dontneed_start,
420 active_block - dontneed_start - 1, MADV_DONTNEED);
421 #endif
422 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
423 #ifdef HAVE_POSIX_FADVISE
424 posix_fadvise(rrd_file->fd, dontneed_start,
425 active_block - dontneed_start - 1,
426 POSIX_FADV_DONTNEED);
427 #endif
428 }
429 dontneed_start = active_block;
430 /* do not release 'hot' block if update for this RAA will occur
431 * within 10 minutes */
432 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
433 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
434 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
435 dontneed_start += _page_size;
436 }
437 rra_start +=
438 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
439 sizeof(rrd_value_t);
440 }
442 if (dontneed_start < rrd_file->file_len) {
443 #ifdef USE_MADVISE
444 madvise(rrd_file->file_start + dontneed_start,
445 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
446 #endif
447 #ifdef HAVE_POSIX_FADVISE
448 posix_fadvise(rrd_file->fd, dontneed_start,
449 rrd_file->file_len - dontneed_start,
450 POSIX_FADV_DONTNEED);
451 #endif
452 }
454 #if defined DEBUG && DEBUG > 1
455 mincore_print(rrd_file, "after");
456 #endif
457 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
458 }
464 int rrd_close(
465 rrd_file_t *rrd_file)
466 {
467 int ret;
469 #ifdef HAVE_MMAP
470 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
471 if (ret != 0)
472 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
473 ret = munmap(rrd_file->file_start, rrd_file->file_len);
474 if (ret != 0)
475 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
476 #endif
477 ret = close(rrd_file->fd);
478 if (ret != 0)
479 rrd_set_error("closing file: %s", rrd_strerror(errno));
480 free(rrd_file);
481 rrd_file = NULL;
482 return ret;
483 }
486 /* Set position of rrd_file. */
488 off_t rrd_seek(
489 rrd_file_t *rrd_file,
490 off_t off,
491 int whence)
492 {
493 off_t ret = 0;
495 #ifdef HAVE_MMAP
496 if (whence == SEEK_SET)
497 rrd_file->pos = off;
498 else if (whence == SEEK_CUR)
499 rrd_file->pos += off;
500 else if (whence == SEEK_END)
501 rrd_file->pos = rrd_file->file_len + off;
502 #else
503 ret = lseek(rrd_file->fd, off, whence);
504 if (ret < 0)
505 rrd_set_error("lseek: %s", rrd_strerror(errno));
506 rrd_file->pos = ret;
507 #endif
508 /* mimic fseek, which returns 0 upon success */
509 return ret < 0; /*XXX: or just ret to mimic lseek */
510 }
513 /* Get current position in rrd_file. */
515 off_t rrd_tell(
516 rrd_file_t *rrd_file)
517 {
518 return rrd_file->pos;
519 }
522 /* Read count bytes into buffer buf, starting at rrd_file->pos.
523 * Returns the number of bytes read or <0 on error. */
525 ssize_t rrd_read(
526 rrd_file_t *rrd_file,
527 void *buf,
528 size_t count)
529 {
530 #ifdef HAVE_MMAP
531 size_t _cnt = count;
532 ssize_t _surplus;
534 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
535 return 0;
536 if (buf == NULL)
537 return -1; /* EINVAL */
538 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
539 if (_surplus > 0) { /* short read */
540 _cnt -= _surplus;
541 }
542 if (_cnt == 0)
543 return 0; /* EOF */
544 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
546 rrd_file->pos += _cnt; /* mimmic read() semantics */
547 return _cnt;
548 #else
549 ssize_t ret;
551 ret = read(rrd_file->fd, buf, count);
552 if (ret > 0)
553 rrd_file->pos += ret; /* mimmic read() semantics */
554 return ret;
555 #endif
556 }
559 /* Write count bytes from buffer buf to the current position
560 * rrd_file->pos of rrd_file->fd.
561 * Returns the number of bytes written or <0 on error. */
563 ssize_t rrd_write(
564 rrd_file_t *rrd_file,
565 const void *buf,
566 size_t count)
567 {
568 #ifdef HAVE_MMAP
569 if (count == 0)
570 return 0;
571 if (buf == NULL)
572 return -1; /* EINVAL */
573 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
574 rrd_file->pos += count;
575 return count; /* mimmic write() semantics */
576 #else
577 ssize_t _sz = write(rrd_file->fd, buf, count);
579 if (_sz > 0)
580 rrd_file->pos += _sz;
581 return _sz;
582 #endif
583 }
586 /* flush all data pending to be written to FD. */
588 void rrd_flush(
589 rrd_file_t *rrd_file)
590 {
591 /*
592 * Win32 can only flush files by FlushFileBuffers function,
593 * but it works with HANDLE hFile, not FILE. So skipping
594 */
595 #ifndef WIN32
596 if (fdatasync(rrd_file->fd) != 0) {
597 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
598 rrd_strerror(errno));
599 }
600 #endif
601 }
604 /* Initialize RRD header. */
606 void rrd_init(
607 rrd_t *rrd)
608 {
609 rrd->stat_head = NULL;
610 rrd->ds_def = NULL;
611 rrd->rra_def = NULL;
612 rrd->live_head = NULL;
613 rrd->legacy_last_up = NULL;
614 rrd->rra_ptr = NULL;
615 rrd->pdp_prep = NULL;
616 rrd->cdp_prep = NULL;
617 rrd->rrd_value = NULL;
618 }
621 /* free RRD header data. */
623 #ifdef HAVE_MMAP
624 void rrd_free(
625 rrd_t *rrd)
626 {
627 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
628 free(rrd->live_head);
629 }
630 }
631 #else
632 void rrd_free(
633 rrd_t *rrd)
634 {
635 free(rrd->live_head);
636 free(rrd->stat_head);
637 free(rrd->ds_def);
638 free(rrd->rra_def);
639 free(rrd->rra_ptr);
640 free(rrd->pdp_prep);
641 free(rrd->cdp_prep);
642 free(rrd->rrd_value);
643 }
644 #endif
647 /* routine used by external libraries to free memory allocated by
648 * rrd library */
650 void rrd_freemem(
651 void *mem)
652 {
653 free(mem);
654 }