1 /*****************************************************************************
2 * RRDtool 1.3.9 Copyright by Tobi Oetiker, 1997-2009
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
12 #ifdef WIN32
13 #include <stdlib.h>
14 #include <fcntl.h>
15 #include <sys/stat.h>
16 #endif
18 #ifdef HAVE_BROKEN_MS_ASYNC
19 #include <sys/types.h>
20 #include <utime.h>
21 #endif
23 #define MEMBLK 8192
25 /* DEBUG 2 prints information obtained via mincore(2) */
26 #define DEBUG 1
27 /* do not calculate exact madvise hints but assume 1 page for headers and
28 * set DONTNEED for the rest, which is assumed to be data */
29 /* Avoid calling madvise on areas that were already hinted. May be benefical if
30 * your syscalls are very slow */
32 #ifdef HAVE_MMAP
33 /* the cast to void* is there to avoid this warning seen on ia64 with certain
34 versions of gcc: 'cast increases required alignment of target type'
35 */
36 #define __rrd_read(dst, dst_t, cnt) { \
37 size_t wanted = sizeof(dst_t)*(cnt); \
38 if (offset + wanted > rrd_file->file_len) { \
39 rrd_set_error("reached EOF while loading header " #dst); \
40 goto out_nullify_head; \
41 } \
42 (dst) = (dst_t*)(void*) (data + offset); \
43 offset += wanted; \
44 }
45 #else
46 #define __rrd_read(dst, dst_t, cnt) { \
47 size_t wanted = sizeof(dst_t)*(cnt); \
48 size_t got; \
49 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
50 rrd_set_error(#dst " malloc"); \
51 goto out_nullify_head; \
52 } \
53 got = read (rrd_file->fd, dst, wanted); \
54 if (got != wanted) { \
55 rrd_set_error("short read while reading header " #dst); \
56 goto out_nullify_head; \
57 } \
58 offset += got; \
59 }
60 #endif
62 /* get the address of the start of this page */
63 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
64 #ifndef PAGE_START
65 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
66 #endif
67 #endif
69 /* Open a database file, return its header and an open filehandle,
70 * positioned to the first cdp in the first rra.
71 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
72 * before returning an error. Do not call rrd_close upon failure of rrd_open.
73 */
75 rrd_file_t *rrd_open(
76 const char *const file_name,
77 rrd_t *rrd,
78 unsigned rdwr)
79 {
80 int flags = 0;
82 /* Win32 can't use S_IRUSR flag */
83 #ifndef WIN32
84 mode_t mode = S_IRUSR;
85 #else
86 int mode = 0;
87 #endif
88 int version;
90 #ifdef HAVE_MMAP
91 ssize_t _page_size = sysconf(_SC_PAGESIZE);
92 int mm_prot = PROT_READ, mm_flags = 0;
93 char *data = MAP_FAILED;
94 #endif
95 off_t offset = 0;
96 struct stat statb;
97 rrd_file_t *rrd_file = NULL;
98 off_t newfile_size = 0;
100 if (rdwr & RRD_CREAT) {
101 /* yes bad inline signaling alert, we are using the
102 floatcookie to pass the size in ... only used in resize */
103 newfile_size = (off_t) rrd->stat_head->float_cookie;
104 free(rrd->stat_head);
105 }
106 rrd_init(rrd);
107 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
108 if (rrd_file == NULL) {
109 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
110 return NULL;
111 }
112 memset(rrd_file, 0, sizeof(rrd_file_t));
114 #ifdef DEBUG
115 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
116 (RRD_READONLY | RRD_READWRITE)) {
117 /* Both READONLY and READWRITE were given, which is invalid. */
118 rrd_set_error("in read/write request mask");
119 exit(-1);
120 }
121 #endif
122 if (rdwr & RRD_READONLY) {
123 flags |= O_RDONLY;
124 #ifdef HAVE_MMAP
125 mm_flags = MAP_PRIVATE;
126 # ifdef MAP_NORESERVE
127 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
128 # endif
129 #endif
130 } else {
131 if (rdwr & RRD_READWRITE) {
132 #ifndef WIN32 // Win32 can't use this mode
133 mode |= S_IWUSR;
134 #endif
135 flags |= O_RDWR;
136 #ifdef HAVE_MMAP
137 mm_flags = MAP_SHARED;
138 mm_prot |= PROT_WRITE;
139 #endif
140 }
141 if (rdwr & RRD_CREAT) {
142 flags |= (O_CREAT | O_TRUNC);
143 }
144 }
145 if (rdwr & RRD_READAHEAD) {
146 #ifdef MAP_POPULATE
147 mm_flags |= MAP_POPULATE; /* populate ptes and data */
148 #endif
149 #if defined MAP_NONBLOCK
150 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
151 #endif
152 }
153 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
154 flags |= O_BINARY;
155 #endif
157 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
158 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
159 goto out_free;
160 }
162 #ifdef HAVE_MMAP
163 #ifdef HAVE_BROKEN_MS_ASYNC
164 if (rdwr & RRD_READWRITE) {
165 /* some unices, the files mtime does not get update
166 on msync MS_ASYNC, in order to help them,
167 we update the the timestamp at this point.
168 The thing happens pretty 'close' to the open
169 call so the chances of a race should be minimal.
171 Maybe ask your vendor to fix your OS ... */
172 utime(file_name,NULL);
173 }
174 #endif
175 #endif
177 /* Better try to avoid seeks as much as possible. stat may be heavy but
178 * many concurrent seeks are even worse. */
179 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
180 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
181 goto out_close;
182 }
183 if (newfile_size == 0) {
184 rrd_file->file_len = statb.st_size;
185 } else {
186 rrd_file->file_len = newfile_size;
187 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
188 write(rrd_file->fd, "\0", 1); /* poke */
189 lseek(rrd_file->fd, 0, SEEK_SET);
190 }
191 #ifdef HAVE_POSIX_FADVISE
192 /* In general we need no read-ahead when dealing with rrd_files.
193 When we stop reading, it is highly unlikely that we start up again.
194 In this manner we actually save time and diskaccess (and buffer cache).
195 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
196 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
197 #endif
199 /*
200 if (rdwr & RRD_READWRITE)
201 {
202 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
203 rrd_set_error("failed to disable the stream buffer\n");
204 return (-1);
205 }
206 }
207 */
208 #ifdef HAVE_MMAP
209 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
210 rrd_file->fd, offset);
212 /* lets see if the first read worked */
213 if (data == MAP_FAILED) {
214 rrd_set_error("mmaping file '%s': %s", file_name,
215 rrd_strerror(errno));
216 goto out_close;
217 }
218 rrd_file->file_start = data;
219 if (rdwr & RRD_CREAT) {
220 memset(data, DNAN, newfile_size - 1);
221 goto out_done;
222 }
223 #endif
224 if (rdwr & RRD_CREAT)
225 goto out_done;
226 #ifdef USE_MADVISE
227 if (rdwr & RRD_COPY) {
228 /* We will read everything in a moment (copying) */
229 madvise(data, rrd_file->file_len, MADV_WILLNEED);
230 madvise(data, rrd_file->file_len, MADV_SEQUENTIAL);
231 } else {
232 /* We do not need to read anything in for the moment */
233 madvise(data, rrd_file->file_len, MADV_RANDOM);
234 /* the stat_head will be needed soonish, so hint accordingly */
235 madvise(data, sizeof(stat_head_t), MADV_WILLNEED);
236 madvise(data, sizeof(stat_head_t), MADV_RANDOM);
237 }
238 #endif
240 __rrd_read(rrd->stat_head, stat_head_t,
241 1);
243 /* lets do some test if we are on track ... */
244 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
245 rrd_set_error("'%s' is not an RRD file", file_name);
246 goto out_nullify_head;
247 }
249 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
250 rrd_set_error("This RRD was created on another architecture");
251 goto out_nullify_head;
252 }
254 version = atoi(rrd->stat_head->version);
256 if (version > atoi(RRD_VERSION)) {
257 rrd_set_error("can't handle RRD file version %s",
258 rrd->stat_head->version);
259 goto out_nullify_head;
260 }
261 #if defined USE_MADVISE
262 /* the ds_def will be needed soonish, so hint accordingly */
263 madvise(data + PAGE_START(offset),
264 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
265 #endif
266 __rrd_read(rrd->ds_def, ds_def_t,
267 rrd->stat_head->ds_cnt);
269 #if defined USE_MADVISE
270 /* the rra_def will be needed soonish, so hint accordingly */
271 madvise(data + PAGE_START(offset),
272 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
273 #endif
274 __rrd_read(rrd->rra_def, rra_def_t,
275 rrd->stat_head->rra_cnt);
277 /* handle different format for the live_head */
278 if (version < 3) {
279 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
280 if (rrd->live_head == NULL) {
281 rrd_set_error("live_head_t malloc");
282 goto out_close;
283 }
284 #if defined USE_MADVISE
285 /* the live_head will be needed soonish, so hint accordingly */
286 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
287 #endif
288 __rrd_read(rrd->legacy_last_up, time_t,
289 1);
291 rrd->live_head->last_up = *rrd->legacy_last_up;
292 rrd->live_head->last_up_usec = 0;
293 } else {
294 #if defined USE_MADVISE
295 /* the live_head will be needed soonish, so hint accordingly */
296 madvise(data + PAGE_START(offset),
297 sizeof(live_head_t), MADV_WILLNEED);
298 #endif
299 __rrd_read(rrd->live_head, live_head_t,
300 1);
301 }
302 __rrd_read(rrd->pdp_prep, pdp_prep_t,
303 rrd->stat_head->ds_cnt);
304 __rrd_read(rrd->cdp_prep, cdp_prep_t,
305 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
306 __rrd_read(rrd->rra_ptr, rra_ptr_t,
307 rrd->stat_head->rra_cnt);
309 rrd_file->header_len = offset;
310 rrd_file->pos = offset;
312 {
313 unsigned long row_cnt = 0;
314 unsigned long i;
316 for (i=0; i<rrd->stat_head->rra_cnt; i++)
317 row_cnt += rrd->rra_def[i].row_cnt;
319 size_t correct_len = rrd_file->header_len +
320 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
322 if (correct_len > rrd_file->file_len)
323 {
324 rrd_set_error("'%s' is too small (should be %ld bytes)",
325 file_name, (long long) correct_len);
326 goto out_nullify_head;
327 }
328 }
330 out_done:
331 return (rrd_file);
332 out_nullify_head:
333 rrd->stat_head = NULL;
334 out_close:
335 #ifdef HAVE_MMAP
336 if (data != MAP_FAILED)
337 munmap(data, rrd_file->file_len);
338 #endif
339 close(rrd_file->fd);
340 out_free:
341 free(rrd_file);
342 return NULL;
343 }
346 #if defined DEBUG && DEBUG > 1
347 /* Print list of in-core pages of a the current rrd_file. */
348 static
349 void mincore_print(
350 rrd_file_t *rrd_file,
351 char *mark)
352 {
353 #ifdef HAVE_MMAP
354 /* pretty print blocks in core */
355 off_t off;
356 unsigned char *vec;
357 ssize_t _page_size = sysconf(_SC_PAGESIZE);
359 off = rrd_file->file_len +
360 ((rrd_file->file_len + _page_size - 1) / _page_size);
361 vec = malloc(off);
362 if (vec != NULL) {
363 memset(vec, 0, off);
364 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
365 int prev;
366 unsigned is_in = 0, was_in = 0;
368 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
369 is_in = vec[off] & 1; /* if lsb set then is core resident */
370 if (off == 0)
371 was_in = is_in;
372 if (was_in != is_in) {
373 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
374 was_in ? "" : "not ", vec + prev, off - prev);
375 was_in = is_in;
376 prev = off;
377 }
378 }
379 fprintf(stderr,
380 "%s: %sin core: %p len %ld\n", mark,
381 was_in ? "" : "not ", vec + prev, off - prev);
382 } else
383 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
384 }
385 #else
386 fprintf(stderr, "sorry mincore only works with mmap");
387 #endif
388 }
389 #endif /* defined DEBUG && DEBUG > 1 */
392 /* drop cache except for the header and the active pages */
393 void rrd_dontneed(
394 rrd_file_t *rrd_file,
395 rrd_t *rrd)
396 {
397 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
398 size_t dontneed_start;
399 size_t rra_start;
400 size_t active_block;
401 size_t i;
402 ssize_t _page_size = sysconf(_SC_PAGESIZE);
404 if (rrd_file == NULL) {
405 #if defined DEBUG && DEBUG
406 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
407 #endif
408 return;
409 }
411 #if defined DEBUG && DEBUG > 1
412 mincore_print(rrd_file, "before");
413 #endif
415 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
416 rra_start = rrd_file->header_len;
417 dontneed_start = PAGE_START(rra_start) + _page_size;
418 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
419 active_block =
420 PAGE_START(rra_start
421 + rrd->rra_ptr[i].cur_row
422 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
423 if (active_block > dontneed_start) {
424 #ifdef USE_MADVISE
425 madvise(rrd_file->file_start + dontneed_start,
426 active_block - dontneed_start - 1, MADV_DONTNEED);
427 #endif
428 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
429 #ifdef HAVE_POSIX_FADVISE
430 posix_fadvise(rrd_file->fd, dontneed_start,
431 active_block - dontneed_start - 1,
432 POSIX_FADV_DONTNEED);
433 #endif
434 }
435 dontneed_start = active_block;
436 /* do not release 'hot' block if update for this RAA will occur
437 * within 10 minutes */
438 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
439 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
440 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
441 dontneed_start += _page_size;
442 }
443 rra_start +=
444 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
445 sizeof(rrd_value_t);
446 }
448 if (dontneed_start < rrd_file->file_len) {
449 #ifdef USE_MADVISE
450 madvise(rrd_file->file_start + dontneed_start,
451 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
452 #endif
453 #ifdef HAVE_POSIX_FADVISE
454 posix_fadvise(rrd_file->fd, dontneed_start,
455 rrd_file->file_len - dontneed_start,
456 POSIX_FADV_DONTNEED);
457 #endif
458 }
460 #if defined DEBUG && DEBUG > 1
461 mincore_print(rrd_file, "after");
462 #endif
463 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
464 }
470 int rrd_close(
471 rrd_file_t *rrd_file)
472 {
473 int ret;
475 #ifdef HAVE_MMAP
476 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
477 if (ret != 0)
478 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
479 ret = munmap(rrd_file->file_start, rrd_file->file_len);
480 if (ret != 0)
481 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
482 #endif
483 ret = close(rrd_file->fd);
484 if (ret != 0)
485 rrd_set_error("closing file: %s", rrd_strerror(errno));
486 free(rrd_file);
487 rrd_file = NULL;
488 return ret;
489 }
492 /* Set position of rrd_file. */
494 off_t rrd_seek(
495 rrd_file_t *rrd_file,
496 off_t off,
497 int whence)
498 {
499 off_t ret = 0;
501 #ifdef HAVE_MMAP
502 if (whence == SEEK_SET)
503 rrd_file->pos = off;
504 else if (whence == SEEK_CUR)
505 rrd_file->pos += off;
506 else if (whence == SEEK_END)
507 rrd_file->pos = rrd_file->file_len + off;
508 #else
509 ret = lseek(rrd_file->fd, off, whence);
510 if (ret < 0)
511 rrd_set_error("lseek: %s", rrd_strerror(errno));
512 rrd_file->pos = ret;
513 #endif
514 /* mimic fseek, which returns 0 upon success */
515 return ret < 0; /*XXX: or just ret to mimic lseek */
516 }
519 /* Get current position in rrd_file. */
521 off_t rrd_tell(
522 rrd_file_t *rrd_file)
523 {
524 return rrd_file->pos;
525 }
528 /* Read count bytes into buffer buf, starting at rrd_file->pos.
529 * Returns the number of bytes read or <0 on error. */
531 ssize_t rrd_read(
532 rrd_file_t *rrd_file,
533 void *buf,
534 size_t count)
535 {
536 #ifdef HAVE_MMAP
537 size_t _cnt = count;
538 ssize_t _surplus;
540 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
541 return 0;
542 if (buf == NULL)
543 return -1; /* EINVAL */
544 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
545 if (_surplus > 0) { /* short read */
546 _cnt -= _surplus;
547 }
548 if (_cnt == 0)
549 return 0; /* EOF */
550 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
552 rrd_file->pos += _cnt; /* mimmic read() semantics */
553 return _cnt;
554 #else
555 ssize_t ret;
557 ret = read(rrd_file->fd, buf, count);
558 if (ret > 0)
559 rrd_file->pos += ret; /* mimmic read() semantics */
560 return ret;
561 #endif
562 }
565 /* Write count bytes from buffer buf to the current position
566 * rrd_file->pos of rrd_file->fd.
567 * Returns the number of bytes written or <0 on error. */
569 ssize_t rrd_write(
570 rrd_file_t *rrd_file,
571 const void *buf,
572 size_t count)
573 {
574 #ifdef HAVE_MMAP
575 if (count == 0)
576 return 0;
577 if (buf == NULL)
578 return -1; /* EINVAL */
579 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
580 rrd_file->pos += count;
581 return count; /* mimmic write() semantics */
582 #else
583 ssize_t _sz = write(rrd_file->fd, buf, count);
585 if (_sz > 0)
586 rrd_file->pos += _sz;
587 return _sz;
588 #endif
589 }
592 /* flush all data pending to be written to FD. */
594 void rrd_flush(
595 rrd_file_t *rrd_file)
596 {
597 /*
598 * Win32 can only flush files by FlushFileBuffers function,
599 * but it works with HANDLE hFile, not FILE. So skipping
600 */
601 #ifndef WIN32
602 if (fdatasync(rrd_file->fd) != 0) {
603 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
604 rrd_strerror(errno));
605 }
606 #endif
607 }
610 /* Initialize RRD header. */
612 void rrd_init(
613 rrd_t *rrd)
614 {
615 rrd->stat_head = NULL;
616 rrd->ds_def = NULL;
617 rrd->rra_def = NULL;
618 rrd->live_head = NULL;
619 rrd->legacy_last_up = NULL;
620 rrd->rra_ptr = NULL;
621 rrd->pdp_prep = NULL;
622 rrd->cdp_prep = NULL;
623 rrd->rrd_value = NULL;
624 }
627 /* free RRD header data. */
629 #ifdef HAVE_MMAP
630 void rrd_free(
631 rrd_t *rrd)
632 {
633 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
634 free(rrd->live_head);
635 }
636 }
637 #else
638 void rrd_free(
639 rrd_t *rrd)
640 {
641 free(rrd->live_head);
642 free(rrd->stat_head);
643 free(rrd->ds_def);
644 free(rrd->rra_def);
645 free(rrd->rra_ptr);
646 free(rrd->pdp_prep);
647 free(rrd->cdp_prep);
648 free(rrd->rrd_value);
649 }
650 #endif
653 /* routine used by external libraries to free memory allocated by
654 * rrd library */
656 void rrd_freemem(
657 void *mem)
658 {
659 free(mem);
660 }