1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
29 } \
30 (dst) = (dst_t*)(void*) (data + offset); \
31 offset += wanted; \
32 }
33 #else
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
36 size_t got; \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
40 } \
41 got = read (rrd_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
45 } \
46 offset += got; \
47 }
48 #endif
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
52 #ifndef PAGE_START
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
54 #endif
55 #endif
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
61 * If creating a new file, the parameter rrd must be initialised with
62 * details of the file content.
63 * If opening an existing file, then use rrd must be initialised by
64 * rrd_init(rrd) prior to invoking rrd_open
65 */
67 rrd_file_t *rrd_open(
68 const char *const file_name,
69 rrd_t *rrd,
70 unsigned rdwr)
71 {
72 int i;
73 int flags = 0;
74 mode_t mode = S_IRUSR;
75 int version;
77 #ifdef HAVE_MMAP
78 ssize_t _page_size = sysconf(_SC_PAGESIZE);
79 char *data = MAP_FAILED;
80 #endif
81 off_t offset = 0;
82 struct stat statb;
83 rrd_file_t *rrd_file = NULL;
84 off_t newfile_size = 0;
85 off_t header_len, value_cnt, data_len;
87 /* Are we creating a new file? */
88 if((rdwr & RRD_CREAT) && (rrd->stat_head != NULL))
89 {
90 header_len = \
91 sizeof(stat_head_t) + \
92 sizeof(ds_def_t) * rrd->stat_head->ds_cnt + \
93 sizeof(rra_def_t) * rrd->stat_head->rra_cnt + \
94 sizeof(time_t) + \
95 sizeof(live_head_t) + \
96 sizeof(pdp_prep_t) * rrd->stat_head->ds_cnt + \
97 sizeof(cdp_prep_t) * rrd->stat_head->ds_cnt * rrd->stat_head->rra_cnt + \
98 sizeof(rra_ptr_t) * rrd->stat_head->rra_cnt;
100 value_cnt = 0;
101 for (i = 0; i < rrd->stat_head->rra_cnt; i++)
102 value_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt;
104 data_len = sizeof(rrd_value_t) * value_cnt;
106 newfile_size = header_len + data_len;
107 }
109 rrd_file = malloc(sizeof(rrd_file_t));
110 if (rrd_file == NULL) {
111 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
112 return NULL;
113 }
114 memset(rrd_file, 0, sizeof(rrd_file_t));
116 #ifdef DEBUG
117 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
118 (RRD_READONLY | RRD_READWRITE)) {
119 /* Both READONLY and READWRITE were given, which is invalid. */
120 rrd_set_error("in read/write request mask");
121 exit(-1);
122 }
123 #endif
125 #ifdef HAVE_MMAP
126 rrd_file->mm_prot = PROT_READ;
127 rrd_file->mm_flags = 0;
128 #endif
130 if (rdwr & RRD_READONLY) {
131 flags |= O_RDONLY;
132 #ifdef HAVE_MMAP
133 rrd_file->mm_flags = MAP_PRIVATE;
134 # ifdef MAP_NORESERVE
135 rrd_file->mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
136 # endif
137 #endif
138 } else {
139 if (rdwr & RRD_READWRITE) {
140 mode |= S_IWUSR;
141 flags |= O_RDWR;
142 #ifdef HAVE_MMAP
143 rrd_file->mm_flags = MAP_SHARED;
144 rrd_file->mm_prot |= PROT_WRITE;
145 #endif
146 }
147 if (rdwr & RRD_CREAT) {
148 flags |= (O_CREAT | O_TRUNC);
149 }
150 }
151 if (rdwr & RRD_READAHEAD) {
152 #ifdef MAP_POPULATE
153 rrd_file->mm_flags |= MAP_POPULATE; /* populate ptes and data */
154 #endif
155 #if defined MAP_NONBLOCK
156 rrd_file->mm_flags |= MAP_NONBLOCK; /* just populate ptes */
157 #endif
158 }
159 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
160 flags |= O_BINARY;
161 #endif
163 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
164 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
165 goto out_free;
166 }
168 /* Better try to avoid seeks as much as possible. stat may be heavy but
169 * many concurrent seeks are even worse. */
170 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
171 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
172 goto out_close;
173 }
174 if (newfile_size == 0) {
175 rrd_file->file_len = statb.st_size;
176 } else {
177 rrd_file->file_len = newfile_size;
178 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
179 write(rrd_file->fd, "\0", 1); /* poke */
180 lseek(rrd_file->fd, 0, SEEK_SET);
181 }
182 #ifdef HAVE_POSIX_FADVISE
183 /* In general we need no read-ahead when dealing with rrd_files.
184 When we stop reading, it is highly unlikely that we start up again.
185 In this manner we actually save time and diskaccess (and buffer cache).
186 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
187 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
188 #endif
190 /*
191 if (rdwr & RRD_READWRITE)
192 {
193 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
194 rrd_set_error("failed to disable the stream buffer\n");
195 return (-1);
196 }
197 }
198 */
200 #ifdef HAVE_MMAP
201 data = mmap(0, rrd_file->file_len, rrd_file->mm_prot, rrd_file->mm_flags,
202 rrd_file->fd, offset);
204 /* lets see if the first read worked */
205 if (data == MAP_FAILED) {
206 rrd_set_error("mmaping file '%s': %s", file_name,
207 rrd_strerror(errno));
208 goto out_close;
209 }
210 rrd_file->file_start = data;
211 if (rdwr & RRD_CREAT) {
212 memset(data, DNAN, newfile_size - 1);
213 goto out_done;
214 }
215 #endif
216 if (rdwr & RRD_CREAT)
217 goto out_done;
218 #ifdef USE_MADVISE
219 if (rdwr & RRD_COPY) {
220 /* We will read everything in a moment (copying) */
221 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
222 } else {
223 /* We do not need to read anything in for the moment */
224 madvise(data, rrd_file->file_len, MADV_RANDOM);
225 /* the stat_head will be needed soonish, so hint accordingly */
226 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
227 }
228 #endif
230 __rrd_read(rrd->stat_head, stat_head_t,
231 1);
233 /* lets do some test if we are on track ... */
234 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
235 rrd_set_error("'%s' is not an RRD file", file_name);
236 goto out_nullify_head;
237 }
239 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
240 rrd_set_error("This RRD was created on another architecture");
241 goto out_nullify_head;
242 }
244 version = atoi(rrd->stat_head->version);
246 if (version > atoi(RRD_VERSION)) {
247 rrd_set_error("can't handle RRD file version %s",
248 rrd->stat_head->version);
249 goto out_nullify_head;
250 }
251 #if defined USE_MADVISE
252 /* the ds_def will be needed soonish, so hint accordingly */
253 madvise(data + PAGE_START(offset),
254 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
255 #endif
256 __rrd_read(rrd->ds_def, ds_def_t,
257 rrd->stat_head->ds_cnt);
259 #if defined USE_MADVISE
260 /* the rra_def will be needed soonish, so hint accordingly */
261 madvise(data + PAGE_START(offset),
262 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
263 #endif
264 __rrd_read(rrd->rra_def, rra_def_t,
265 rrd->stat_head->rra_cnt);
267 /* handle different format for the live_head */
268 if (version < 3) {
269 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
270 if (rrd->live_head == NULL) {
271 rrd_set_error("live_head_t malloc");
272 goto out_close;
273 }
274 #if defined USE_MADVISE
275 /* the live_head will be needed soonish, so hint accordingly */
276 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
277 #endif
278 __rrd_read(rrd->legacy_last_up, time_t,
279 1);
281 rrd->live_head->last_up = *rrd->legacy_last_up;
282 rrd->live_head->last_up_usec = 0;
283 } else {
284 #if defined USE_MADVISE
285 /* the live_head will be needed soonish, so hint accordingly */
286 madvise(data + PAGE_START(offset),
287 sizeof(live_head_t), MADV_WILLNEED);
288 #endif
289 __rrd_read(rrd->live_head, live_head_t,
290 1);
291 }
292 __rrd_read(rrd->pdp_prep, pdp_prep_t,
293 rrd->stat_head->ds_cnt);
294 __rrd_read(rrd->cdp_prep, cdp_prep_t,
295 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
296 __rrd_read(rrd->rra_ptr, rra_ptr_t,
297 rrd->stat_head->rra_cnt);
299 rrd_file->header_len = offset;
300 rrd_file->pos = offset;
302 {
303 unsigned long row_cnt = 0;
304 unsigned long i;
306 for (i=0; i<rrd->stat_head->rra_cnt; i++)
307 row_cnt += rrd->rra_def[i].row_cnt;
309 off_t correct_len = rrd_file->header_len +
310 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
312 if (correct_len > rrd_file->file_len)
313 {
314 rrd_set_error("'%s' is too small (should be %ld bytes)",
315 file_name, (long long) correct_len);
316 goto out_nullify_head;
317 }
318 }
320 out_done:
321 return (rrd_file);
322 out_nullify_head:
323 rrd->stat_head = NULL;
324 out_close:
325 #ifdef HAVE_MMAP
326 if (data != MAP_FAILED)
327 munmap(data, rrd_file->file_len);
328 #endif
329 close(rrd_file->fd);
330 out_free:
331 free(rrd_file);
332 return NULL;
333 }
336 #if defined DEBUG && DEBUG > 1
337 /* Print list of in-core pages of a the current rrd_file. */
338 static
339 void mincore_print(
340 rrd_file_t *rrd_file,
341 char *mark)
342 {
343 #ifdef HAVE_MMAP
344 /* pretty print blocks in core */
345 off_t off;
346 unsigned char *vec;
347 ssize_t _page_size = sysconf(_SC_PAGESIZE);
349 off = rrd_file->file_len +
350 ((rrd_file->file_len + _page_size - 1) / _page_size);
351 vec = malloc(off);
352 if (vec != NULL) {
353 memset(vec, 0, off);
354 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
355 int prev;
356 unsigned is_in = 0, was_in = 0;
358 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
359 is_in = vec[off] & 1; /* if lsb set then is core resident */
360 if (off == 0)
361 was_in = is_in;
362 if (was_in != is_in) {
363 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
364 was_in ? "" : "not ", vec + prev, off - prev);
365 was_in = is_in;
366 prev = off;
367 }
368 }
369 fprintf(stderr,
370 "%s: %sin core: %p len %ld\n", mark,
371 was_in ? "" : "not ", vec + prev, off - prev);
372 } else
373 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
374 }
375 #else
376 fprintf(stderr, "sorry mincore only works with mmap");
377 #endif
378 }
379 #endif /* defined DEBUG && DEBUG > 1 */
381 /*
382 * get exclusive lock to whole file.
383 * lock gets removed when we close the file
384 *
385 * returns 0 on success
386 */
387 int rrd_lock(
388 rrd_file_t *file)
389 {
390 int rcstat;
392 {
393 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
394 struct _stat st;
396 if (_fstat(file->fd, &st) == 0) {
397 rcstat = _locking(file->fd, _LK_NBLCK, st.st_size);
398 } else {
399 rcstat = -1;
400 }
401 #else
402 struct flock lock;
404 lock.l_type = F_WRLCK; /* exclusive write lock */
405 lock.l_len = 0; /* whole file */
406 lock.l_start = 0; /* start of file */
407 lock.l_whence = SEEK_SET; /* end of file */
409 rcstat = fcntl(file->fd, F_SETLK, &lock);
410 #endif
411 }
413 return (rcstat);
414 }
417 /* drop cache except for the header and the active pages */
418 void rrd_dontneed(
419 rrd_file_t *rrd_file,
420 rrd_t *rrd)
421 {
422 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
423 off_t dontneed_start;
424 off_t rra_start;
425 off_t active_block;
426 unsigned long i;
427 ssize_t _page_size = sysconf(_SC_PAGESIZE);
429 if (rrd_file == NULL) {
430 #if defined DEBUG && DEBUG
431 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
432 #endif
433 return;
434 }
436 #if defined DEBUG && DEBUG > 1
437 mincore_print(rrd_file, "before");
438 #endif
440 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
441 rra_start = rrd_file->header_len;
442 dontneed_start = PAGE_START(rra_start) + _page_size;
443 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
444 active_block =
445 PAGE_START(rra_start
446 + rrd->rra_ptr[i].cur_row
447 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
448 if (active_block > dontneed_start) {
449 #ifdef USE_MADVISE
450 madvise(rrd_file->file_start + dontneed_start,
451 active_block - dontneed_start - 1, MADV_DONTNEED);
452 #endif
453 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
454 #ifdef HAVE_POSIX_FADVISE
455 posix_fadvise(rrd_file->fd, dontneed_start,
456 active_block - dontneed_start - 1,
457 POSIX_FADV_DONTNEED);
458 #endif
459 }
460 dontneed_start = active_block;
461 /* do not release 'hot' block if update for this RAA will occur
462 * within 10 minutes */
463 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
464 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
465 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
466 dontneed_start += _page_size;
467 }
468 rra_start +=
469 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
470 sizeof(rrd_value_t);
471 }
473 if (dontneed_start < rrd_file->file_len) {
474 #ifdef USE_MADVISE
475 madvise(rrd_file->file_start + dontneed_start,
476 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
477 #endif
478 #ifdef HAVE_POSIX_FADVISE
479 posix_fadvise(rrd_file->fd, dontneed_start,
480 rrd_file->file_len - dontneed_start,
481 POSIX_FADV_DONTNEED);
482 #endif
483 }
485 #if defined DEBUG && DEBUG > 1
486 mincore_print(rrd_file, "after");
487 #endif
488 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
489 }
495 int rrd_close(
496 rrd_file_t *rrd_file)
497 {
498 int ret;
500 #ifdef HAVE_MMAP
501 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
502 if (ret != 0)
503 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
504 ret = munmap(rrd_file->file_start, rrd_file->file_len);
505 if (ret != 0)
506 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
507 #endif
508 ret = close(rrd_file->fd);
509 if (ret != 0)
510 rrd_set_error("closing file: %s", rrd_strerror(errno));
511 free(rrd_file);
512 rrd_file = NULL;
513 return ret;
514 }
517 /* Set position of rrd_file. */
519 off_t rrd_seek(
520 rrd_file_t *rrd_file,
521 off_t off,
522 int whence)
523 {
524 off_t ret = 0;
526 #ifdef HAVE_MMAP
527 if (whence == SEEK_SET)
528 rrd_file->pos = off;
529 else if (whence == SEEK_CUR)
530 rrd_file->pos += off;
531 else if (whence == SEEK_END)
532 rrd_file->pos = rrd_file->file_len + off;
533 #else
534 ret = lseek(rrd_file->fd, off, whence);
535 if (ret < 0)
536 rrd_set_error("lseek: %s", rrd_strerror(errno));
537 rrd_file->pos = ret;
538 #endif
539 /* mimic fseek, which returns 0 upon success */
540 return ret < 0; /*XXX: or just ret to mimic lseek */
541 }
544 /* Get current position in rrd_file. */
546 off_t rrd_tell(
547 rrd_file_t *rrd_file)
548 {
549 return rrd_file->pos;
550 }
553 /* Read count bytes into buffer buf, starting at rrd_file->pos.
554 * Returns the number of bytes read or <0 on error. */
556 ssize_t rrd_read(
557 rrd_file_t *rrd_file,
558 void *buf,
559 size_t count)
560 {
561 #ifdef HAVE_MMAP
562 size_t _cnt = count;
563 ssize_t _surplus;
565 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
566 return 0;
567 if (buf == NULL)
568 return -1; /* EINVAL */
569 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
570 if (_surplus > 0) { /* short read */
571 _cnt -= _surplus;
572 }
573 if (_cnt == 0)
574 return 0; /* EOF */
575 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
577 rrd_file->pos += _cnt; /* mimmic read() semantics */
578 return _cnt;
579 #else
580 ssize_t ret;
582 ret = read(rrd_file->fd, buf, count);
583 if (ret > 0)
584 rrd_file->pos += ret; /* mimmic read() semantics */
585 return ret;
586 #endif
587 }
590 /* Write count bytes from buffer buf to the current position
591 * rrd_file->pos of rrd_file->fd.
592 * Returns the number of bytes written or <0 on error. */
594 ssize_t rrd_write(
595 rrd_file_t *rrd_file,
596 const void *buf,
597 size_t count)
598 {
599 #ifdef HAVE_MMAP
600 int old_size = rrd_file->file_len;
601 if (count == 0)
602 return 0;
603 if (buf == NULL)
604 return -1; /* EINVAL */
606 if((rrd_file->pos + count) > old_size)
607 {
608 rrd_set_error("attempting to write beyond end of file");
609 return -1;
610 }
611 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
612 rrd_file->pos += count;
613 return count; /* mimmic write() semantics */
614 #else
615 ssize_t _sz = write(rrd_file->fd, buf, count);
617 if (_sz > 0)
618 rrd_file->pos += _sz;
619 return _sz;
620 #endif
621 }
624 /* flush all data pending to be written to FD. */
626 void rrd_flush(
627 rrd_file_t *rrd_file)
628 {
629 if (fdatasync(rrd_file->fd) != 0) {
630 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
631 rrd_strerror(errno));
632 }
633 }
636 /* Initialize RRD header. */
638 void rrd_init(
639 rrd_t *rrd)
640 {
641 rrd->stat_head = NULL;
642 rrd->ds_def = NULL;
643 rrd->rra_def = NULL;
644 rrd->live_head = NULL;
645 rrd->legacy_last_up = NULL;
646 rrd->rra_ptr = NULL;
647 rrd->pdp_prep = NULL;
648 rrd->cdp_prep = NULL;
649 rrd->rrd_value = NULL;
650 }
653 /* free RRD header data. */
655 #ifdef HAVE_MMAP
656 void rrd_free(
657 rrd_t *rrd)
658 {
659 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
660 free(rrd->live_head);
661 }
662 }
663 #else
664 void rrd_free(
665 rrd_t *rrd)
666 {
667 free(rrd->live_head);
668 free(rrd->stat_head);
669 free(rrd->ds_def);
670 free(rrd->rra_def);
671 free(rrd->rra_ptr);
672 free(rrd->pdp_prep);
673 free(rrd->cdp_prep);
674 free(rrd->rrd_value);
675 }
676 #endif
679 /* routine used by external libraries to free memory allocated by
680 * rrd library */
682 void rrd_freemem(
683 void *mem)
684 {
685 free(mem);
686 }