1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
29 } \
30 (dst) = (dst_t*)(void*) (data + offset); \
31 offset += wanted; \
32 }
33 #else
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
36 size_t got; \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
40 } \
41 got = read (rrd_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
45 } \
46 offset += got; \
47 }
48 #endif
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
52 #ifndef PAGE_START
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
54 #endif
55 #endif
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
61 */
63 rrd_file_t *rrd_open(
64 const char *const file_name,
65 rrd_t *rrd,
66 unsigned rdwr)
67 {
68 int flags = 0;
69 mode_t mode = S_IRUSR;
70 int version;
72 #ifdef HAVE_MMAP
73 ssize_t _page_size = sysconf(_SC_PAGESIZE);
74 int mm_prot = PROT_READ, mm_flags = 0;
75 char *data = MAP_FAILED;
76 #endif
77 off_t offset = 0;
78 struct stat statb;
79 rrd_file_t *rrd_file = NULL;
80 off_t newfile_size = 0;
82 if ((rdwr & RRD_CREAT) && (rdwr & RRD_CREAT_SETSIZE)) {
83 /* yes bad inline signaling alert, we are using the
84 floatcookie to pass the size in ... only used in resize */
85 newfile_size = (off_t) rrd->stat_head->float_cookie;
86 free(rrd->stat_head);
87 }
88 if(!(rdwr & RRD_CREAT))
89 rrd_init(rrd);
90 rrd_file = malloc(sizeof(rrd_file_t));
91 if (rrd_file == NULL) {
92 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
93 return NULL;
94 }
95 memset(rrd_file, 0, sizeof(rrd_file_t));
97 #ifdef DEBUG
98 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
99 (RRD_READONLY | RRD_READWRITE)) {
100 /* Both READONLY and READWRITE were given, which is invalid. */
101 rrd_set_error("in read/write request mask");
102 exit(-1);
103 }
104 #endif
105 if (rdwr & RRD_READONLY) {
106 flags |= O_RDONLY;
107 #ifdef HAVE_MMAP
108 mm_flags = MAP_PRIVATE;
109 # ifdef MAP_NORESERVE
110 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
111 # endif
112 #endif
113 } else {
114 if (rdwr & RRD_READWRITE) {
115 mode |= S_IWUSR;
116 flags |= O_RDWR;
117 #ifdef HAVE_MMAP
118 mm_flags = MAP_SHARED;
119 mm_prot |= PROT_WRITE;
120 #endif
121 }
122 if (rdwr & RRD_CREAT) {
123 flags |= (O_CREAT | O_TRUNC);
124 }
125 }
126 if (rdwr & RRD_READAHEAD) {
127 #ifdef MAP_POPULATE
128 mm_flags |= MAP_POPULATE; /* populate ptes and data */
129 #endif
130 #if defined MAP_NONBLOCK
131 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
132 #endif
133 }
134 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
135 flags |= O_BINARY;
136 #endif
138 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
139 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
140 goto out_free;
141 }
143 /* Better try to avoid seeks as much as possible. stat may be heavy but
144 * many concurrent seeks are even worse. */
145 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
146 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
147 goto out_close;
148 }
149 if (newfile_size == 0) {
150 rrd_file->file_len = statb.st_size;
151 } else {
152 rrd_file->file_len = newfile_size;
153 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
154 write(rrd_file->fd, "\0", 1); /* poke */
155 lseek(rrd_file->fd, 0, SEEK_SET);
156 }
157 #ifdef HAVE_POSIX_FADVISE
158 /* In general we need no read-ahead when dealing with rrd_files.
159 When we stop reading, it is highly unlikely that we start up again.
160 In this manner we actually save time and diskaccess (and buffer cache).
161 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
162 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
163 #endif
165 /*
166 if (rdwr & RRD_READWRITE)
167 {
168 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
169 rrd_set_error("failed to disable the stream buffer\n");
170 return (-1);
171 }
172 }
173 */
175 #ifdef HAVE_MMAP
176 if(rrd_file->file_len == 0 && (rdwr & RRD_CREAT))
177 {
178 rrd_file->file_start = NULL;
179 goto out_done;
180 }
181 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
182 rrd_file->fd, offset);
184 /* lets see if the first read worked */
185 if (data == MAP_FAILED) {
186 rrd_set_error("mmaping file '%s': %s", file_name,
187 rrd_strerror(errno));
188 goto out_close;
189 }
190 rrd_file->file_start = data;
191 if (rdwr & RRD_CREAT) {
192 memset(data, DNAN, newfile_size - 1);
193 goto out_done;
194 }
195 #endif
196 if (rdwr & RRD_CREAT)
197 goto out_done;
198 #ifdef USE_MADVISE
199 if (rdwr & RRD_COPY) {
200 /* We will read everything in a moment (copying) */
201 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
202 } else {
203 /* We do not need to read anything in for the moment */
204 madvise(data, rrd_file->file_len, MADV_RANDOM);
205 /* the stat_head will be needed soonish, so hint accordingly */
206 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
207 }
208 #endif
210 __rrd_read(rrd->stat_head, stat_head_t,
211 1);
213 /* lets do some test if we are on track ... */
214 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
215 rrd_set_error("'%s' is not an RRD file", file_name);
216 goto out_nullify_head;
217 }
219 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
220 rrd_set_error("This RRD was created on another architecture");
221 goto out_nullify_head;
222 }
224 version = atoi(rrd->stat_head->version);
226 if (version > atoi(RRD_VERSION)) {
227 rrd_set_error("can't handle RRD file version %s",
228 rrd->stat_head->version);
229 goto out_nullify_head;
230 }
231 #if defined USE_MADVISE
232 /* the ds_def will be needed soonish, so hint accordingly */
233 madvise(data + PAGE_START(offset),
234 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
235 #endif
236 __rrd_read(rrd->ds_def, ds_def_t,
237 rrd->stat_head->ds_cnt);
239 #if defined USE_MADVISE
240 /* the rra_def will be needed soonish, so hint accordingly */
241 madvise(data + PAGE_START(offset),
242 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
243 #endif
244 __rrd_read(rrd->rra_def, rra_def_t,
245 rrd->stat_head->rra_cnt);
247 /* handle different format for the live_head */
248 if (version < 3) {
249 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
250 if (rrd->live_head == NULL) {
251 rrd_set_error("live_head_t malloc");
252 goto out_close;
253 }
254 #if defined USE_MADVISE
255 /* the live_head will be needed soonish, so hint accordingly */
256 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
257 #endif
258 __rrd_read(rrd->legacy_last_up, time_t,
259 1);
261 rrd->live_head->last_up = *rrd->legacy_last_up;
262 rrd->live_head->last_up_usec = 0;
263 } else {
264 #if defined USE_MADVISE
265 /* the live_head will be needed soonish, so hint accordingly */
266 madvise(data + PAGE_START(offset),
267 sizeof(live_head_t), MADV_WILLNEED);
268 #endif
269 __rrd_read(rrd->live_head, live_head_t,
270 1);
271 }
272 __rrd_read(rrd->pdp_prep, pdp_prep_t,
273 rrd->stat_head->ds_cnt);
274 __rrd_read(rrd->cdp_prep, cdp_prep_t,
275 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
276 __rrd_read(rrd->rra_ptr, rra_ptr_t,
277 rrd->stat_head->rra_cnt);
279 rrd_file->header_len = offset;
280 rrd_file->pos = offset;
282 {
283 unsigned long row_cnt = 0;
284 unsigned long i;
286 for (i=0; i<rrd->stat_head->rra_cnt; i++)
287 row_cnt += rrd->rra_def[i].row_cnt;
289 off_t correct_len = rrd_file->header_len +
290 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
292 if (correct_len > rrd_file->file_len)
293 {
294 rrd_set_error("'%s' is too small (should be %ld bytes)",
295 file_name, (long long) correct_len);
296 goto out_nullify_head;
297 }
298 }
300 out_done:
301 return (rrd_file);
302 out_nullify_head:
303 rrd->stat_head = NULL;
304 out_close:
305 #ifdef HAVE_MMAP
306 if (data != MAP_FAILED)
307 munmap(data, rrd_file->file_len);
308 #endif
309 close(rrd_file->fd);
310 out_free:
311 free(rrd_file);
312 return NULL;
313 }
316 #if defined DEBUG && DEBUG > 1
317 /* Print list of in-core pages of a the current rrd_file. */
318 static
319 void mincore_print(
320 rrd_file_t *rrd_file,
321 char *mark)
322 {
323 #ifdef HAVE_MMAP
324 /* pretty print blocks in core */
325 off_t off;
326 unsigned char *vec;
327 ssize_t _page_size = sysconf(_SC_PAGESIZE);
329 off = rrd_file->file_len +
330 ((rrd_file->file_len + _page_size - 1) / _page_size);
331 vec = malloc(off);
332 if (vec != NULL) {
333 memset(vec, 0, off);
334 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
335 int prev;
336 unsigned is_in = 0, was_in = 0;
338 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
339 is_in = vec[off] & 1; /* if lsb set then is core resident */
340 if (off == 0)
341 was_in = is_in;
342 if (was_in != is_in) {
343 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
344 was_in ? "" : "not ", vec + prev, off - prev);
345 was_in = is_in;
346 prev = off;
347 }
348 }
349 fprintf(stderr,
350 "%s: %sin core: %p len %ld\n", mark,
351 was_in ? "" : "not ", vec + prev, off - prev);
352 } else
353 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
354 }
355 #else
356 fprintf(stderr, "sorry mincore only works with mmap");
357 #endif
358 }
359 #endif /* defined DEBUG && DEBUG > 1 */
362 /* drop cache except for the header and the active pages */
363 void rrd_dontneed(
364 rrd_file_t *rrd_file,
365 rrd_t *rrd)
366 {
367 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
368 off_t dontneed_start;
369 off_t rra_start;
370 off_t active_block;
371 unsigned long i;
372 ssize_t _page_size = sysconf(_SC_PAGESIZE);
374 if (rrd_file == NULL) {
375 #if defined DEBUG && DEBUG
376 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
377 #endif
378 return;
379 }
381 #if defined DEBUG && DEBUG > 1
382 mincore_print(rrd_file, "before");
383 #endif
385 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
386 rra_start = rrd_file->header_len;
387 dontneed_start = PAGE_START(rra_start) + _page_size;
388 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
389 active_block =
390 PAGE_START(rra_start
391 + rrd->rra_ptr[i].cur_row
392 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
393 if (active_block > dontneed_start) {
394 #ifdef USE_MADVISE
395 madvise(rrd_file->file_start + dontneed_start,
396 active_block - dontneed_start - 1, MADV_DONTNEED);
397 #endif
398 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
399 #ifdef HAVE_POSIX_FADVISE
400 posix_fadvise(rrd_file->fd, dontneed_start,
401 active_block - dontneed_start - 1,
402 POSIX_FADV_DONTNEED);
403 #endif
404 }
405 dontneed_start = active_block;
406 /* do not release 'hot' block if update for this RAA will occur
407 * within 10 minutes */
408 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
409 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
410 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
411 dontneed_start += _page_size;
412 }
413 rra_start +=
414 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
415 sizeof(rrd_value_t);
416 }
418 if (dontneed_start < rrd_file->file_len) {
419 #ifdef USE_MADVISE
420 madvise(rrd_file->file_start + dontneed_start,
421 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
422 #endif
423 #ifdef HAVE_POSIX_FADVISE
424 posix_fadvise(rrd_file->fd, dontneed_start,
425 rrd_file->file_len - dontneed_start,
426 POSIX_FADV_DONTNEED);
427 #endif
428 }
430 #if defined DEBUG && DEBUG > 1
431 mincore_print(rrd_file, "after");
432 #endif
433 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
434 }
440 int rrd_close(
441 rrd_file_t *rrd_file)
442 {
443 int ret;
445 #ifdef HAVE_MMAP
446 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
447 if (ret != 0)
448 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
449 ret = munmap(rrd_file->file_start, rrd_file->file_len);
450 if (ret != 0)
451 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
452 #endif
453 ret = close(rrd_file->fd);
454 if (ret != 0)
455 rrd_set_error("closing file: %s", rrd_strerror(errno));
456 free(rrd_file);
457 rrd_file = NULL;
458 return ret;
459 }
462 /* Set position of rrd_file. */
464 off_t rrd_seek(
465 rrd_file_t *rrd_file,
466 off_t off,
467 int whence)
468 {
469 off_t ret = 0;
471 #ifdef HAVE_MMAP
472 if (whence == SEEK_SET)
473 rrd_file->pos = off;
474 else if (whence == SEEK_CUR)
475 rrd_file->pos += off;
476 else if (whence == SEEK_END)
477 rrd_file->pos = rrd_file->file_len + off;
478 #else
479 ret = lseek(rrd_file->fd, off, whence);
480 if (ret < 0)
481 rrd_set_error("lseek: %s", rrd_strerror(errno));
482 rrd_file->pos = ret;
483 #endif
484 /* mimic fseek, which returns 0 upon success */
485 return ret < 0; /*XXX: or just ret to mimic lseek */
486 }
489 /* Get current position in rrd_file. */
491 off_t rrd_tell(
492 rrd_file_t *rrd_file)
493 {
494 return rrd_file->pos;
495 }
498 /* Read count bytes into buffer buf, starting at rrd_file->pos.
499 * Returns the number of bytes read or <0 on error. */
501 ssize_t rrd_read(
502 rrd_file_t *rrd_file,
503 void *buf,
504 size_t count)
505 {
506 #ifdef HAVE_MMAP
507 size_t _cnt = count;
508 ssize_t _surplus;
510 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
511 return 0;
512 if (buf == NULL)
513 return -1; /* EINVAL */
514 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
515 if (_surplus > 0) { /* short read */
516 _cnt -= _surplus;
517 }
518 if (_cnt == 0)
519 return 0; /* EOF */
520 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
522 rrd_file->pos += _cnt; /* mimmic read() semantics */
523 return _cnt;
524 #else
525 ssize_t ret;
527 ret = read(rrd_file->fd, buf, count);
528 if (ret > 0)
529 rrd_file->pos += ret; /* mimmic read() semantics */
530 return ret;
531 #endif
532 }
535 /* Write count bytes from buffer buf to the current position
536 * rrd_file->pos of rrd_file->fd.
537 * Returns the number of bytes written or <0 on error. */
539 ssize_t rrd_write(
540 rrd_file_t *rrd_file,
541 const void *buf,
542 size_t count)
543 {
544 #ifdef HAVE_MMAP
545 /* These flags are used if creating a new RRD */
546 int mm_prot = PROT_READ | PROT_WRITE, mm_flags = MAP_SHARED;
547 int old_size = rrd_file->file_len;
548 int new_size = rrd_file->file_len;
549 if (count == 0)
550 return 0;
551 if (buf == NULL)
552 return -1; /* EINVAL */
554 if((rrd_file->pos + count) > old_size)
555 {
556 new_size = rrd_file->pos + count;
557 rrd_file->file_len = new_size;
558 lseek(rrd_file->fd, new_size - 1, SEEK_SET);
559 write(rrd_file->fd, "\0", 1); /* poke */
560 lseek(rrd_file->fd, 0, SEEK_SET);
561 if(rrd_file->file_start == NULL)
562 {
563 rrd_file->file_start = mmap(0, new_size, mm_prot, mm_flags,
564 rrd_file->fd, 0);
565 }
566 else
567 rrd_file->file_start = mremap(rrd_file->file_start, old_size, new_size, MREMAP_MAYMOVE);
569 if (rrd_file->file_start == MAP_FAILED) {
570 rrd_set_error("m(re)maping file : %s",
571 rrd_strerror(errno));
572 return -1;
573 }
574 }
575 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
576 rrd_file->pos += count;
577 return count; /* mimmic write() semantics */
578 #else
579 ssize_t _sz = write(rrd_file->fd, buf, count);
581 if (_sz > 0)
582 rrd_file->pos += _sz;
583 return _sz;
584 #endif
585 }
588 /* flush all data pending to be written to FD. */
590 void rrd_flush(
591 rrd_file_t *rrd_file)
592 {
593 if (fdatasync(rrd_file->fd) != 0) {
594 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
595 rrd_strerror(errno));
596 }
597 }
600 /* Initialize RRD header. */
602 void rrd_init(
603 rrd_t *rrd)
604 {
605 rrd->stat_head = NULL;
606 rrd->ds_def = NULL;
607 rrd->rra_def = NULL;
608 rrd->live_head = NULL;
609 rrd->legacy_last_up = NULL;
610 rrd->rra_ptr = NULL;
611 rrd->pdp_prep = NULL;
612 rrd->cdp_prep = NULL;
613 rrd->rrd_value = NULL;
614 }
617 /* free RRD header data. */
619 #ifdef HAVE_MMAP
620 void rrd_free(
621 rrd_t *rrd)
622 {
623 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
624 free(rrd->live_head);
625 }
626 }
627 #else
628 void rrd_free(
629 rrd_t *rrd)
630 {
631 free(rrd->live_head);
632 free(rrd->stat_head);
633 free(rrd->ds_def);
634 free(rrd->rra_def);
635 free(rrd->rra_ptr);
636 free(rrd->pdp_prep);
637 free(rrd->cdp_prep);
638 free(rrd->rrd_value);
639 }
640 #endif
643 /* routine used by external libraries to free memory allocated by
644 * rrd library */
646 void rrd_freemem(
647 void *mem)
648 {
649 free(mem);
650 }