f7ccca87a7d643c19d78ab8dce8c9ffc12560e81
1 /*****************************************************************************
2 * RRDtool 1.3.8 Copyright by Tobi Oetiker, 1997-2009
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
12 #ifdef WIN32
13 #include <stdlib.h>
14 #include <fcntl.h>
15 #include <sys/stat.h>
16 #endif
18 #ifdef HAVE_BROKEN_MS_ASYNC
19 #include <sys/types.h>
20 #include <utime.h>
21 #endif
23 #define MEMBLK 8192
25 /* DEBUG 2 prints information obtained via mincore(2) */
26 #define DEBUG 1
27 /* do not calculate exact madvise hints but assume 1 page for headers and
28 * set DONTNEED for the rest, which is assumed to be data */
29 /* Avoid calling madvise on areas that were already hinted. May be benefical if
30 * your syscalls are very slow */
32 #ifdef HAVE_MMAP
33 /* the cast to void* is there to avoid this warning seen on ia64 with certain
34 versions of gcc: 'cast increases required alignment of target type'
35 */
36 #define __rrd_read(dst, dst_t, cnt) { \
37 size_t wanted = sizeof(dst_t)*(cnt); \
38 if (offset + wanted > rrd_file->file_len) { \
39 rrd_set_error("reached EOF while loading header " #dst); \
40 goto out_nullify_head; \
41 } \
42 (dst) = (dst_t*)(void*) (data + offset); \
43 offset += wanted; \
44 }
45 #else
46 #define __rrd_read(dst, dst_t, cnt) { \
47 size_t wanted = sizeof(dst_t)*(cnt); \
48 size_t got; \
49 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
50 rrd_set_error(#dst " malloc"); \
51 goto out_nullify_head; \
52 } \
53 got = read (rrd_file->fd, dst, wanted); \
54 if (got != wanted) { \
55 rrd_set_error("short read while reading header " #dst); \
56 goto out_nullify_head; \
57 } \
58 offset += got; \
59 }
60 #endif
62 /* get the address of the start of this page */
63 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
64 #ifndef PAGE_START
65 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
66 #endif
67 #endif
69 /* Open a database file, return its header and an open filehandle,
70 * positioned to the first cdp in the first rra.
71 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
72 * before returning an error. Do not call rrd_close upon failure of rrd_open.
73 */
75 rrd_file_t *rrd_open(
76 const char *const file_name,
77 rrd_t *rrd,
78 unsigned rdwr)
79 {
80 int flags = 0;
82 /* Win32 can't use S_IRUSR flag */
83 #ifndef WIN32
84 mode_t mode = S_IRUSR;
85 #else
86 int mode = 0;
87 #endif
88 int version;
90 #ifdef HAVE_MMAP
91 ssize_t _page_size = sysconf(_SC_PAGESIZE);
92 int mm_prot = PROT_READ, mm_flags = 0;
93 char *data = MAP_FAILED;
94 #endif
95 off_t offset = 0;
96 struct stat statb;
97 rrd_file_t *rrd_file = NULL;
98 off_t newfile_size = 0;
100 if (rdwr & RRD_CREAT) {
101 /* yes bad inline signaling alert, we are using the
102 floatcookie to pass the size in ... only used in resize */
103 newfile_size = (off_t) rrd->stat_head->float_cookie;
104 free(rrd->stat_head);
105 }
106 rrd_init(rrd);
107 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
108 if (rrd_file == NULL) {
109 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
110 return NULL;
111 }
112 memset(rrd_file, 0, sizeof(rrd_file_t));
114 #ifdef DEBUG
115 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
116 (RRD_READONLY | RRD_READWRITE)) {
117 /* Both READONLY and READWRITE were given, which is invalid. */
118 rrd_set_error("in read/write request mask");
119 exit(-1);
120 }
121 #endif
122 if (rdwr & RRD_READONLY) {
123 flags |= O_RDONLY;
124 #ifdef HAVE_MMAP
125 mm_flags = MAP_PRIVATE;
126 # ifdef MAP_NORESERVE
127 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
128 # endif
129 #endif
130 } else {
131 if (rdwr & RRD_READWRITE) {
132 #ifndef WIN32 // Win32 can't use this mode
133 mode |= S_IWUSR;
134 #endif
135 flags |= O_RDWR;
136 #ifdef HAVE_MMAP
137 mm_flags = MAP_SHARED;
138 mm_prot |= PROT_WRITE;
139 #endif
140 }
141 if (rdwr & RRD_CREAT) {
142 flags |= (O_CREAT | O_TRUNC);
143 }
144 }
145 if (rdwr & RRD_READAHEAD) {
146 #ifdef MAP_POPULATE
147 mm_flags |= MAP_POPULATE; /* populate ptes and data */
148 #endif
149 #if defined MAP_NONBLOCK
150 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
151 #endif
152 }
153 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
154 flags |= O_BINARY;
155 #endif
157 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
158 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
159 goto out_free;
160 }
162 #ifdef HAVE_MMAP
163 #ifdef HAVE_BROKEN_MS_ASYNC
164 if (rdwr & RRD_READWRITE) {
165 /* some unices, the files mtime does not get update
166 on msync MS_ASYNC, in order to help them,
167 we update the the timestamp at this point.
168 The thing happens pretty 'close' to the open
169 call so the chances of a race should be minimal.
171 Maybe ask your vendor to fix your OS ... */
172 utime(file_name,NULL);
173 }
174 #endif
175 #endif
177 /* Better try to avoid seeks as much as possible. stat may be heavy but
178 * many concurrent seeks are even worse. */
179 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
180 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
181 goto out_close;
182 }
183 if (newfile_size == 0) {
184 rrd_file->file_len = statb.st_size;
185 } else {
186 rrd_file->file_len = newfile_size;
187 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
188 write(rrd_file->fd, "\0", 1); /* poke */
189 lseek(rrd_file->fd, 0, SEEK_SET);
190 }
191 #ifdef HAVE_POSIX_FADVISE
192 /* In general we need no read-ahead when dealing with rrd_files.
193 When we stop reading, it is highly unlikely that we start up again.
194 In this manner we actually save time and diskaccess (and buffer cache).
195 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
196 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
197 #endif
199 /*
200 if (rdwr & RRD_READWRITE)
201 {
202 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
203 rrd_set_error("failed to disable the stream buffer\n");
204 return (-1);
205 }
206 }
207 */
208 #ifdef HAVE_MMAP
209 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
210 rrd_file->fd, offset);
212 /* lets see if the first read worked */
213 if (data == MAP_FAILED) {
214 rrd_set_error("mmaping file '%s': %s", file_name,
215 rrd_strerror(errno));
216 goto out_close;
217 }
218 rrd_file->file_start = data;
219 if (rdwr & RRD_CREAT) {
220 memset(data, DNAN, newfile_size - 1);
221 goto out_done;
222 }
223 #endif
224 if (rdwr & RRD_CREAT)
225 goto out_done;
226 #ifdef USE_MADVISE
227 if (rdwr & RRD_COPY) {
228 /* We will read everything in a moment (copying) */
229 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
230 } else {
231 /* We do not need to read anything in for the moment */
232 madvise(data, rrd_file->file_len, MADV_RANDOM);
233 /* the stat_head will be needed soonish, so hint accordingly */
234 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
235 }
236 #endif
238 __rrd_read(rrd->stat_head, stat_head_t,
239 1);
241 /* lets do some test if we are on track ... */
242 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
243 rrd_set_error("'%s' is not an RRD file", file_name);
244 goto out_nullify_head;
245 }
247 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
248 rrd_set_error("This RRD was created on another architecture");
249 goto out_nullify_head;
250 }
252 version = atoi(rrd->stat_head->version);
254 if (version > atoi(RRD_VERSION)) {
255 rrd_set_error("can't handle RRD file version %s",
256 rrd->stat_head->version);
257 goto out_nullify_head;
258 }
259 #if defined USE_MADVISE
260 /* the ds_def will be needed soonish, so hint accordingly */
261 madvise(data + PAGE_START(offset),
262 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
263 #endif
264 __rrd_read(rrd->ds_def, ds_def_t,
265 rrd->stat_head->ds_cnt);
267 #if defined USE_MADVISE
268 /* the rra_def will be needed soonish, so hint accordingly */
269 madvise(data + PAGE_START(offset),
270 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
271 #endif
272 __rrd_read(rrd->rra_def, rra_def_t,
273 rrd->stat_head->rra_cnt);
275 /* handle different format for the live_head */
276 if (version < 3) {
277 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
278 if (rrd->live_head == NULL) {
279 rrd_set_error("live_head_t malloc");
280 goto out_close;
281 }
282 #if defined USE_MADVISE
283 /* the live_head will be needed soonish, so hint accordingly */
284 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
285 #endif
286 __rrd_read(rrd->legacy_last_up, time_t,
287 1);
289 rrd->live_head->last_up = *rrd->legacy_last_up;
290 rrd->live_head->last_up_usec = 0;
291 } else {
292 #if defined USE_MADVISE
293 /* the live_head will be needed soonish, so hint accordingly */
294 madvise(data + PAGE_START(offset),
295 sizeof(live_head_t), MADV_WILLNEED);
296 #endif
297 __rrd_read(rrd->live_head, live_head_t,
298 1);
299 }
300 __rrd_read(rrd->pdp_prep, pdp_prep_t,
301 rrd->stat_head->ds_cnt);
302 __rrd_read(rrd->cdp_prep, cdp_prep_t,
303 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
304 __rrd_read(rrd->rra_ptr, rra_ptr_t,
305 rrd->stat_head->rra_cnt);
307 rrd_file->header_len = offset;
308 rrd_file->pos = offset;
310 {
311 unsigned long row_cnt = 0;
312 unsigned long i;
314 for (i=0; i<rrd->stat_head->rra_cnt; i++)
315 row_cnt += rrd->rra_def[i].row_cnt;
317 size_t correct_len = rrd_file->header_len +
318 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
320 if (correct_len > rrd_file->file_len)
321 {
322 rrd_set_error("'%s' is too small (should be %ld bytes)",
323 file_name, (long long) correct_len);
324 goto out_nullify_head;
325 }
326 }
328 out_done:
329 return (rrd_file);
330 out_nullify_head:
331 rrd->stat_head = NULL;
332 out_close:
333 #ifdef HAVE_MMAP
334 if (data != MAP_FAILED)
335 munmap(data, rrd_file->file_len);
336 #endif
337 close(rrd_file->fd);
338 out_free:
339 free(rrd_file);
340 return NULL;
341 }
344 #if defined DEBUG && DEBUG > 1
345 /* Print list of in-core pages of a the current rrd_file. */
346 static
347 void mincore_print(
348 rrd_file_t *rrd_file,
349 char *mark)
350 {
351 #ifdef HAVE_MMAP
352 /* pretty print blocks in core */
353 off_t off;
354 unsigned char *vec;
355 ssize_t _page_size = sysconf(_SC_PAGESIZE);
357 off = rrd_file->file_len +
358 ((rrd_file->file_len + _page_size - 1) / _page_size);
359 vec = malloc(off);
360 if (vec != NULL) {
361 memset(vec, 0, off);
362 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
363 int prev;
364 unsigned is_in = 0, was_in = 0;
366 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
367 is_in = vec[off] & 1; /* if lsb set then is core resident */
368 if (off == 0)
369 was_in = is_in;
370 if (was_in != is_in) {
371 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
372 was_in ? "" : "not ", vec + prev, off - prev);
373 was_in = is_in;
374 prev = off;
375 }
376 }
377 fprintf(stderr,
378 "%s: %sin core: %p len %ld\n", mark,
379 was_in ? "" : "not ", vec + prev, off - prev);
380 } else
381 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
382 }
383 #else
384 fprintf(stderr, "sorry mincore only works with mmap");
385 #endif
386 }
387 #endif /* defined DEBUG && DEBUG > 1 */
390 /* drop cache except for the header and the active pages */
391 void rrd_dontneed(
392 rrd_file_t *rrd_file,
393 rrd_t *rrd)
394 {
395 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
396 size_t dontneed_start;
397 size_t rra_start;
398 size_t active_block;
399 size_t i;
400 ssize_t _page_size = sysconf(_SC_PAGESIZE);
402 if (rrd_file == NULL) {
403 #if defined DEBUG && DEBUG
404 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
405 #endif
406 return;
407 }
409 #if defined DEBUG && DEBUG > 1
410 mincore_print(rrd_file, "before");
411 #endif
413 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
414 rra_start = rrd_file->header_len;
415 dontneed_start = PAGE_START(rra_start) + _page_size;
416 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
417 active_block =
418 PAGE_START(rra_start
419 + rrd->rra_ptr[i].cur_row
420 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
421 if (active_block > dontneed_start) {
422 #ifdef USE_MADVISE
423 madvise(rrd_file->file_start + dontneed_start,
424 active_block - dontneed_start - 1, MADV_DONTNEED);
425 #endif
426 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
427 #ifdef HAVE_POSIX_FADVISE
428 posix_fadvise(rrd_file->fd, dontneed_start,
429 active_block - dontneed_start - 1,
430 POSIX_FADV_DONTNEED);
431 #endif
432 }
433 dontneed_start = active_block;
434 /* do not release 'hot' block if update for this RAA will occur
435 * within 10 minutes */
436 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
437 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
438 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
439 dontneed_start += _page_size;
440 }
441 rra_start +=
442 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
443 sizeof(rrd_value_t);
444 }
446 if (dontneed_start < rrd_file->file_len) {
447 #ifdef USE_MADVISE
448 madvise(rrd_file->file_start + dontneed_start,
449 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
450 #endif
451 #ifdef HAVE_POSIX_FADVISE
452 posix_fadvise(rrd_file->fd, dontneed_start,
453 rrd_file->file_len - dontneed_start,
454 POSIX_FADV_DONTNEED);
455 #endif
456 }
458 #if defined DEBUG && DEBUG > 1
459 mincore_print(rrd_file, "after");
460 #endif
461 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
462 }
468 int rrd_close(
469 rrd_file_t *rrd_file)
470 {
471 int ret;
473 #ifdef HAVE_MMAP
474 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
475 if (ret != 0)
476 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
477 ret = munmap(rrd_file->file_start, rrd_file->file_len);
478 if (ret != 0)
479 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
480 #endif
481 ret = close(rrd_file->fd);
482 if (ret != 0)
483 rrd_set_error("closing file: %s", rrd_strerror(errno));
484 free(rrd_file);
485 rrd_file = NULL;
486 return ret;
487 }
490 /* Set position of rrd_file. */
492 off_t rrd_seek(
493 rrd_file_t *rrd_file,
494 off_t off,
495 int whence)
496 {
497 off_t ret = 0;
499 #ifdef HAVE_MMAP
500 if (whence == SEEK_SET)
501 rrd_file->pos = off;
502 else if (whence == SEEK_CUR)
503 rrd_file->pos += off;
504 else if (whence == SEEK_END)
505 rrd_file->pos = rrd_file->file_len + off;
506 #else
507 ret = lseek(rrd_file->fd, off, whence);
508 if (ret < 0)
509 rrd_set_error("lseek: %s", rrd_strerror(errno));
510 rrd_file->pos = ret;
511 #endif
512 /* mimic fseek, which returns 0 upon success */
513 return ret < 0; /*XXX: or just ret to mimic lseek */
514 }
517 /* Get current position in rrd_file. */
519 off_t rrd_tell(
520 rrd_file_t *rrd_file)
521 {
522 return rrd_file->pos;
523 }
526 /* Read count bytes into buffer buf, starting at rrd_file->pos.
527 * Returns the number of bytes read or <0 on error. */
529 ssize_t rrd_read(
530 rrd_file_t *rrd_file,
531 void *buf,
532 size_t count)
533 {
534 #ifdef HAVE_MMAP
535 size_t _cnt = count;
536 ssize_t _surplus;
538 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
539 return 0;
540 if (buf == NULL)
541 return -1; /* EINVAL */
542 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
543 if (_surplus > 0) { /* short read */
544 _cnt -= _surplus;
545 }
546 if (_cnt == 0)
547 return 0; /* EOF */
548 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
550 rrd_file->pos += _cnt; /* mimmic read() semantics */
551 return _cnt;
552 #else
553 ssize_t ret;
555 ret = read(rrd_file->fd, buf, count);
556 if (ret > 0)
557 rrd_file->pos += ret; /* mimmic read() semantics */
558 return ret;
559 #endif
560 }
563 /* Write count bytes from buffer buf to the current position
564 * rrd_file->pos of rrd_file->fd.
565 * Returns the number of bytes written or <0 on error. */
567 ssize_t rrd_write(
568 rrd_file_t *rrd_file,
569 const void *buf,
570 size_t count)
571 {
572 #ifdef HAVE_MMAP
573 if (count == 0)
574 return 0;
575 if (buf == NULL)
576 return -1; /* EINVAL */
577 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
578 rrd_file->pos += count;
579 return count; /* mimmic write() semantics */
580 #else
581 ssize_t _sz = write(rrd_file->fd, buf, count);
583 if (_sz > 0)
584 rrd_file->pos += _sz;
585 return _sz;
586 #endif
587 }
590 /* flush all data pending to be written to FD. */
592 void rrd_flush(
593 rrd_file_t *rrd_file)
594 {
595 /*
596 * Win32 can only flush files by FlushFileBuffers function,
597 * but it works with HANDLE hFile, not FILE. So skipping
598 */
599 #ifndef WIN32
600 if (fdatasync(rrd_file->fd) != 0) {
601 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
602 rrd_strerror(errno));
603 }
604 #endif
605 }
608 /* Initialize RRD header. */
610 void rrd_init(
611 rrd_t *rrd)
612 {
613 rrd->stat_head = NULL;
614 rrd->ds_def = NULL;
615 rrd->rra_def = NULL;
616 rrd->live_head = NULL;
617 rrd->legacy_last_up = NULL;
618 rrd->rra_ptr = NULL;
619 rrd->pdp_prep = NULL;
620 rrd->cdp_prep = NULL;
621 rrd->rrd_value = NULL;
622 }
625 /* free RRD header data. */
627 #ifdef HAVE_MMAP
628 void rrd_free(
629 rrd_t *rrd)
630 {
631 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
632 free(rrd->live_head);
633 }
634 }
635 #else
636 void rrd_free(
637 rrd_t *rrd)
638 {
639 free(rrd->live_head);
640 free(rrd->stat_head);
641 free(rrd->ds_def);
642 free(rrd->rra_def);
643 free(rrd->rra_ptr);
644 free(rrd->pdp_prep);
645 free(rrd->cdp_prep);
646 free(rrd->rrd_value);
647 }
648 #endif
651 /* routine used by external libraries to free memory allocated by
652 * rrd library */
654 void rrd_freemem(
655 void *mem)
656 {
657 free(mem);
658 }