9f1a61f5c7796c1431671d6b7950df2758d4e909
1 /*****************************************************************************
2 * RRDtool 1.3.4 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
12 #ifdef WIN32
13 #include <stdlib.h>
14 #include <fcntl.h>
15 #include <sys/stat.h>
16 #endif
18 #define MEMBLK 8192
20 /* DEBUG 2 prints information obtained via mincore(2) */
21 #define DEBUG 1
22 /* do not calculate exact madvise hints but assume 1 page for headers and
23 * set DONTNEED for the rest, which is assumed to be data */
24 /* Avoid calling madvise on areas that were already hinted. May be benefical if
25 * your syscalls are very slow */
27 #ifdef HAVE_MMAP
28 /* the cast to void* is there to avoid this warning seen on ia64 with certain
29 versions of gcc: 'cast increases required alignment of target type'
30 */
31 #define __rrd_read(dst, dst_t, cnt) { \
32 size_t wanted = sizeof(dst_t)*(cnt); \
33 if (offset + wanted > rrd_file->file_len) { \
34 rrd_set_error("reached EOF while loading header " #dst); \
35 goto out_nullify_head; \
36 } \
37 (dst) = (dst_t*)(void*) (data + offset); \
38 offset += wanted; \
39 }
40 #else
41 #define __rrd_read(dst, dst_t, cnt) { \
42 size_t wanted = sizeof(dst_t)*(cnt); \
43 size_t got; \
44 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
45 rrd_set_error(#dst " malloc"); \
46 goto out_nullify_head; \
47 } \
48 got = read (rrd_file->fd, dst, wanted); \
49 if (got != wanted) { \
50 rrd_set_error("short read while reading header " #dst); \
51 goto out_nullify_head; \
52 } \
53 offset += got; \
54 }
55 #endif
57 /* get the address of the start of this page */
58 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
59 #ifndef PAGE_START
60 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
61 #endif
62 #endif
64 /* Open a database file, return its header and an open filehandle,
65 * positioned to the first cdp in the first rra.
66 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
67 * before returning an error. Do not call rrd_close upon failure of rrd_open.
68 */
70 rrd_file_t *rrd_open(
71 const char *const file_name,
72 rrd_t *rrd,
73 unsigned rdwr)
74 {
75 int flags = 0;
77 /* Win32 can't use S_IRUSR flag */
78 #ifndef WIN32
79 mode_t mode = S_IRUSR;
80 #else
81 int mode = 0;
82 #endif
83 int version;
85 #ifdef HAVE_MMAP
86 ssize_t _page_size = sysconf(_SC_PAGESIZE);
87 int mm_prot = PROT_READ, mm_flags = 0;
88 char *data = MAP_FAILED;
89 #endif
90 off_t offset = 0;
91 struct stat statb;
92 rrd_file_t *rrd_file = NULL;
93 off_t newfile_size = 0;
95 if (rdwr & RRD_CREAT) {
96 /* yes bad inline signaling alert, we are using the
97 floatcookie to pass the size in ... only used in resize */
98 newfile_size = (off_t) rrd->stat_head->float_cookie;
99 free(rrd->stat_head);
100 }
101 rrd_init(rrd);
102 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
103 if (rrd_file == NULL) {
104 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
105 return NULL;
106 }
107 memset(rrd_file, 0, sizeof(rrd_file_t));
109 #ifdef DEBUG
110 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
111 (RRD_READONLY | RRD_READWRITE)) {
112 /* Both READONLY and READWRITE were given, which is invalid. */
113 rrd_set_error("in read/write request mask");
114 exit(-1);
115 }
116 #endif
117 if (rdwr & RRD_READONLY) {
118 flags |= O_RDONLY;
119 #ifdef HAVE_MMAP
120 mm_flags = MAP_PRIVATE;
121 # ifdef MAP_NORESERVE
122 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
123 # endif
124 #endif
125 } else {
126 if (rdwr & RRD_READWRITE) {
127 #ifndef WIN32 // Win32 can't use this mode
128 mode |= S_IWUSR;
129 #endif
130 flags |= O_RDWR;
131 #ifdef HAVE_MMAP
132 mm_flags = MAP_SHARED;
133 mm_prot |= PROT_WRITE;
134 #endif
135 }
136 if (rdwr & RRD_CREAT) {
137 flags |= (O_CREAT | O_TRUNC);
138 }
139 }
140 if (rdwr & RRD_READAHEAD) {
141 #ifdef MAP_POPULATE
142 mm_flags |= MAP_POPULATE; /* populate ptes and data */
143 #endif
144 #if defined MAP_NONBLOCK
145 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
146 #endif
147 }
148 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
149 flags |= O_BINARY;
150 #endif
152 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
153 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
154 goto out_free;
155 }
157 #ifdef HAVE_MMAP
158 #ifdef HAVE_BROKEN_MS_ASYNC
159 if (rdwr & RRD_READWRITE) {
160 /* some unices, the files mtime does not get update
161 on msync MS_ASYNC, in order to help them,
162 we update the the timestamp at this point.
163 The thing happens pretty 'close' to the open
164 call so the chances of a race should be minimal.
166 Maybe ask your vendor to fix your OS ... */
167 utime(file_name,NULL);
168 }
169 #endif
170 #endif
172 /* Better try to avoid seeks as much as possible. stat may be heavy but
173 * many concurrent seeks are even worse. */
174 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
175 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
176 goto out_close;
177 }
178 if (newfile_size == 0) {
179 rrd_file->file_len = statb.st_size;
180 } else {
181 rrd_file->file_len = newfile_size;
182 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
183 write(rrd_file->fd, "\0", 1); /* poke */
184 lseek(rrd_file->fd, 0, SEEK_SET);
185 }
186 #ifdef HAVE_POSIX_FADVISE
187 /* In general we need no read-ahead when dealing with rrd_files.
188 When we stop reading, it is highly unlikely that we start up again.
189 In this manner we actually save time and diskaccess (and buffer cache).
190 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
191 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
192 #endif
194 /*
195 if (rdwr & RRD_READWRITE)
196 {
197 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
198 rrd_set_error("failed to disable the stream buffer\n");
199 return (-1);
200 }
201 }
202 */
203 #ifdef HAVE_MMAP
204 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
205 rrd_file->fd, offset);
207 /* lets see if the first read worked */
208 if (data == MAP_FAILED) {
209 rrd_set_error("mmaping file '%s': %s", file_name,
210 rrd_strerror(errno));
211 goto out_close;
212 }
213 rrd_file->file_start = data;
214 if (rdwr & RRD_CREAT) {
215 memset(data, DNAN, newfile_size - 1);
216 goto out_done;
217 }
218 #endif
219 if (rdwr & RRD_CREAT)
220 goto out_done;
221 #ifdef USE_MADVISE
222 if (rdwr & RRD_COPY) {
223 /* We will read everything in a moment (copying) */
224 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
225 } else {
226 /* We do not need to read anything in for the moment */
227 madvise(data, rrd_file->file_len, MADV_RANDOM);
228 /* the stat_head will be needed soonish, so hint accordingly */
229 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
230 }
231 #endif
233 __rrd_read(rrd->stat_head, stat_head_t,
234 1);
236 /* lets do some test if we are on track ... */
237 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
238 rrd_set_error("'%s' is not an RRD file", file_name);
239 goto out_nullify_head;
240 }
242 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
243 rrd_set_error("This RRD was created on another architecture");
244 goto out_nullify_head;
245 }
247 version = atoi(rrd->stat_head->version);
249 if (version > atoi(RRD_VERSION)) {
250 rrd_set_error("can't handle RRD file version %s",
251 rrd->stat_head->version);
252 goto out_nullify_head;
253 }
254 #if defined USE_MADVISE
255 /* the ds_def will be needed soonish, so hint accordingly */
256 madvise(data + PAGE_START(offset),
257 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
258 #endif
259 __rrd_read(rrd->ds_def, ds_def_t,
260 rrd->stat_head->ds_cnt);
262 #if defined USE_MADVISE
263 /* the rra_def will be needed soonish, so hint accordingly */
264 madvise(data + PAGE_START(offset),
265 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
266 #endif
267 __rrd_read(rrd->rra_def, rra_def_t,
268 rrd->stat_head->rra_cnt);
270 /* handle different format for the live_head */
271 if (version < 3) {
272 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
273 if (rrd->live_head == NULL) {
274 rrd_set_error("live_head_t malloc");
275 goto out_close;
276 }
277 #if defined USE_MADVISE
278 /* the live_head will be needed soonish, so hint accordingly */
279 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
280 #endif
281 __rrd_read(rrd->legacy_last_up, time_t,
282 1);
284 rrd->live_head->last_up = *rrd->legacy_last_up;
285 rrd->live_head->last_up_usec = 0;
286 } else {
287 #if defined USE_MADVISE
288 /* the live_head will be needed soonish, so hint accordingly */
289 madvise(data + PAGE_START(offset),
290 sizeof(live_head_t), MADV_WILLNEED);
291 #endif
292 __rrd_read(rrd->live_head, live_head_t,
293 1);
294 }
295 __rrd_read(rrd->pdp_prep, pdp_prep_t,
296 rrd->stat_head->ds_cnt);
297 __rrd_read(rrd->cdp_prep, cdp_prep_t,
298 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
299 __rrd_read(rrd->rra_ptr, rra_ptr_t,
300 rrd->stat_head->rra_cnt);
302 rrd_file->header_len = offset;
303 rrd_file->pos = offset;
305 {
306 unsigned long row_cnt = 0;
307 unsigned long i;
309 for (i=0; i<rrd->stat_head->rra_cnt; i++)
310 row_cnt += rrd->rra_def[i].row_cnt;
312 off_t correct_len = rrd_file->header_len +
313 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
315 if (correct_len > rrd_file->file_len)
316 {
317 rrd_set_error("'%s' is too small (should be %ld bytes)",
318 file_name, (long long) correct_len);
319 goto out_nullify_head;
320 }
321 }
323 out_done:
324 return (rrd_file);
325 out_nullify_head:
326 rrd->stat_head = NULL;
327 out_close:
328 #ifdef HAVE_MMAP
329 if (data != MAP_FAILED)
330 munmap(data, rrd_file->file_len);
331 #endif
332 close(rrd_file->fd);
333 out_free:
334 free(rrd_file);
335 return NULL;
336 }
339 #if defined DEBUG && DEBUG > 1
340 /* Print list of in-core pages of a the current rrd_file. */
341 static
342 void mincore_print(
343 rrd_file_t *rrd_file,
344 char *mark)
345 {
346 #ifdef HAVE_MMAP
347 /* pretty print blocks in core */
348 off_t off;
349 unsigned char *vec;
350 ssize_t _page_size = sysconf(_SC_PAGESIZE);
352 off = rrd_file->file_len +
353 ((rrd_file->file_len + _page_size - 1) / _page_size);
354 vec = malloc(off);
355 if (vec != NULL) {
356 memset(vec, 0, off);
357 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
358 int prev;
359 unsigned is_in = 0, was_in = 0;
361 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
362 is_in = vec[off] & 1; /* if lsb set then is core resident */
363 if (off == 0)
364 was_in = is_in;
365 if (was_in != is_in) {
366 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
367 was_in ? "" : "not ", vec + prev, off - prev);
368 was_in = is_in;
369 prev = off;
370 }
371 }
372 fprintf(stderr,
373 "%s: %sin core: %p len %ld\n", mark,
374 was_in ? "" : "not ", vec + prev, off - prev);
375 } else
376 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
377 }
378 #else
379 fprintf(stderr, "sorry mincore only works with mmap");
380 #endif
381 }
382 #endif /* defined DEBUG && DEBUG > 1 */
385 /* drop cache except for the header and the active pages */
386 void rrd_dontneed(
387 rrd_file_t *rrd_file,
388 rrd_t *rrd)
389 {
390 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
391 unsigned long dontneed_start;
392 unsigned long rra_start;
393 unsigned long active_block;
394 unsigned long i;
395 ssize_t _page_size = sysconf(_SC_PAGESIZE);
397 if (rrd_file == NULL) {
398 #if defined DEBUG && DEBUG
399 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
400 #endif
401 return;
402 }
404 #if defined DEBUG && DEBUG > 1
405 mincore_print(rrd_file, "before");
406 #endif
408 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
409 rra_start = rrd_file->header_len;
410 dontneed_start = PAGE_START(rra_start) + _page_size;
411 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
412 active_block =
413 PAGE_START(rra_start
414 + rrd->rra_ptr[i].cur_row
415 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
416 if (active_block > dontneed_start) {
417 #ifdef USE_MADVISE
418 madvise(rrd_file->file_start + dontneed_start,
419 active_block - dontneed_start - 1, MADV_DONTNEED);
420 #endif
421 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
422 #ifdef HAVE_POSIX_FADVISE
423 posix_fadvise(rrd_file->fd, dontneed_start,
424 active_block - dontneed_start - 1,
425 POSIX_FADV_DONTNEED);
426 #endif
427 }
428 dontneed_start = active_block;
429 /* do not release 'hot' block if update for this RAA will occur
430 * within 10 minutes */
431 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
432 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
433 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
434 dontneed_start += _page_size;
435 }
436 rra_start +=
437 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
438 sizeof(rrd_value_t);
439 }
441 if (dontneed_start < rrd_file->file_len) {
442 #ifdef USE_MADVISE
443 madvise(rrd_file->file_start + dontneed_start,
444 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
445 #endif
446 #ifdef HAVE_POSIX_FADVISE
447 posix_fadvise(rrd_file->fd, dontneed_start,
448 rrd_file->file_len - dontneed_start,
449 POSIX_FADV_DONTNEED);
450 #endif
451 }
453 #if defined DEBUG && DEBUG > 1
454 mincore_print(rrd_file, "after");
455 #endif
456 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
457 }
463 int rrd_close(
464 rrd_file_t *rrd_file)
465 {
466 int ret;
468 #ifdef HAVE_MMAP
469 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
470 if (ret != 0)
471 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
472 ret = munmap(rrd_file->file_start, rrd_file->file_len);
473 if (ret != 0)
474 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
475 #endif
476 ret = close(rrd_file->fd);
477 if (ret != 0)
478 rrd_set_error("closing file: %s", rrd_strerror(errno));
479 free(rrd_file);
480 rrd_file = NULL;
481 return ret;
482 }
485 /* Set position of rrd_file. */
487 off_t rrd_seek(
488 rrd_file_t *rrd_file,
489 off_t off,
490 int whence)
491 {
492 off_t ret = 0;
494 #ifdef HAVE_MMAP
495 if (whence == SEEK_SET)
496 rrd_file->pos = off;
497 else if (whence == SEEK_CUR)
498 rrd_file->pos += off;
499 else if (whence == SEEK_END)
500 rrd_file->pos = rrd_file->file_len + off;
501 #else
502 ret = lseek(rrd_file->fd, off, whence);
503 if (ret < 0)
504 rrd_set_error("lseek: %s", rrd_strerror(errno));
505 rrd_file->pos = ret;
506 #endif
507 /* mimic fseek, which returns 0 upon success */
508 return ret < 0; /*XXX: or just ret to mimic lseek */
509 }
512 /* Get current position in rrd_file. */
514 off_t rrd_tell(
515 rrd_file_t *rrd_file)
516 {
517 return rrd_file->pos;
518 }
521 /* Read count bytes into buffer buf, starting at rrd_file->pos.
522 * Returns the number of bytes read or <0 on error. */
524 ssize_t rrd_read(
525 rrd_file_t *rrd_file,
526 void *buf,
527 size_t count)
528 {
529 #ifdef HAVE_MMAP
530 size_t _cnt = count;
531 ssize_t _surplus;
533 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
534 return 0;
535 if (buf == NULL)
536 return -1; /* EINVAL */
537 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
538 if (_surplus > 0) { /* short read */
539 _cnt -= _surplus;
540 }
541 if (_cnt == 0)
542 return 0; /* EOF */
543 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
545 rrd_file->pos += _cnt; /* mimmic read() semantics */
546 return _cnt;
547 #else
548 ssize_t ret;
550 ret = read(rrd_file->fd, buf, count);
551 if (ret > 0)
552 rrd_file->pos += ret; /* mimmic read() semantics */
553 return ret;
554 #endif
555 }
558 /* Write count bytes from buffer buf to the current position
559 * rrd_file->pos of rrd_file->fd.
560 * Returns the number of bytes written or <0 on error. */
562 ssize_t rrd_write(
563 rrd_file_t *rrd_file,
564 const void *buf,
565 size_t count)
566 {
567 #ifdef HAVE_MMAP
568 if (count == 0)
569 return 0;
570 if (buf == NULL)
571 return -1; /* EINVAL */
572 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
573 rrd_file->pos += count;
574 return count; /* mimmic write() semantics */
575 #else
576 ssize_t _sz = write(rrd_file->fd, buf, count);
578 if (_sz > 0)
579 rrd_file->pos += _sz;
580 return _sz;
581 #endif
582 }
585 /* flush all data pending to be written to FD. */
587 void rrd_flush(
588 rrd_file_t *rrd_file)
589 {
590 /*
591 * Win32 can only flush files by FlushFileBuffers function,
592 * but it works with HANDLE hFile, not FILE. So skipping
593 */
594 #ifndef WIN32
595 if (fdatasync(rrd_file->fd) != 0) {
596 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
597 rrd_strerror(errno));
598 }
599 #endif
600 }
603 /* Initialize RRD header. */
605 void rrd_init(
606 rrd_t *rrd)
607 {
608 rrd->stat_head = NULL;
609 rrd->ds_def = NULL;
610 rrd->rra_def = NULL;
611 rrd->live_head = NULL;
612 rrd->legacy_last_up = NULL;
613 rrd->rra_ptr = NULL;
614 rrd->pdp_prep = NULL;
615 rrd->cdp_prep = NULL;
616 rrd->rrd_value = NULL;
617 }
620 /* free RRD header data. */
622 #ifdef HAVE_MMAP
623 void rrd_free(
624 rrd_t *rrd)
625 {
626 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
627 free(rrd->live_head);
628 }
629 }
630 #else
631 void rrd_free(
632 rrd_t *rrd)
633 {
634 free(rrd->live_head);
635 free(rrd->stat_head);
636 free(rrd->ds_def);
637 free(rrd->rra_def);
638 free(rrd->rra_ptr);
639 free(rrd->pdp_prep);
640 free(rrd->cdp_prep);
641 free(rrd->rrd_value);
642 }
643 #endif
646 /* routine used by external libraries to free memory allocated by
647 * rrd library */
649 void rrd_freemem(
650 void *mem)
651 {
652 free(mem);
653 }