1 /*****************************************************************************
2 * RRDtool 1.3.4 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
12 #ifdef WIN32
13 #include <stdlib.h>
14 #include <fcntl.h>
15 #include <sys/stat.h>
16 #endif
18 #define MEMBLK 8192
20 /* DEBUG 2 prints information obtained via mincore(2) */
21 #define DEBUG 1
22 /* do not calculate exact madvise hints but assume 1 page for headers and
23 * set DONTNEED for the rest, which is assumed to be data */
24 /* Avoid calling madvise on areas that were already hinted. May be benefical if
25 * your syscalls are very slow */
27 #ifdef HAVE_MMAP
28 /* the cast to void* is there to avoid this warning seen on ia64 with certain
29 versions of gcc: 'cast increases required alignment of target type'
30 */
31 #define __rrd_read(dst, dst_t, cnt) { \
32 size_t wanted = sizeof(dst_t)*(cnt); \
33 if (offset + wanted > rrd_file->file_len) { \
34 rrd_set_error("reached EOF while loading header " #dst); \
35 goto out_nullify_head; \
36 } \
37 (dst) = (dst_t*)(void*) (data + offset); \
38 offset += wanted; \
39 }
40 #else
41 #define __rrd_read(dst, dst_t, cnt) { \
42 size_t wanted = sizeof(dst_t)*(cnt); \
43 size_t got; \
44 if ((dst = (dst_t*)malloc(wanted)) == NULL) { \
45 rrd_set_error(#dst " malloc"); \
46 goto out_nullify_head; \
47 } \
48 got = read (rrd_file->fd, dst, wanted); \
49 if (got != wanted) { \
50 rrd_set_error("short read while reading header " #dst); \
51 goto out_nullify_head; \
52 } \
53 offset += got; \
54 }
55 #endif
57 /* get the address of the start of this page */
58 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
59 #ifndef PAGE_START
60 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
61 #endif
62 #endif
64 /* Open a database file, return its header and an open filehandle,
65 * positioned to the first cdp in the first rra.
66 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
67 * before returning an error. Do not call rrd_close upon failure of rrd_open.
68 */
70 rrd_file_t *rrd_open(
71 const char *const file_name,
72 rrd_t *rrd,
73 unsigned rdwr)
74 {
75 int flags = 0;
77 /* Win32 can't use S_IRUSR flag */
78 #ifndef WIN32
79 mode_t mode = S_IRUSR;
80 #else
81 int mode = 0;
82 #endif
83 int version;
85 #ifdef HAVE_MMAP
86 ssize_t _page_size = sysconf(_SC_PAGESIZE);
87 int mm_prot = PROT_READ, mm_flags = 0;
88 char *data = MAP_FAILED;
89 #endif
90 off_t offset = 0;
91 struct stat statb;
92 rrd_file_t *rrd_file = NULL;
93 off_t newfile_size = 0;
95 if (rdwr & RRD_CREAT) {
96 /* yes bad inline signaling alert, we are using the
97 floatcookie to pass the size in ... only used in resize */
98 newfile_size = (off_t) rrd->stat_head->float_cookie;
99 free(rrd->stat_head);
100 }
101 rrd_init(rrd);
102 rrd_file = (rrd_file_t*)malloc(sizeof(rrd_file_t));
103 if (rrd_file == NULL) {
104 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
105 return NULL;
106 }
107 memset(rrd_file, 0, sizeof(rrd_file_t));
109 #ifdef DEBUG
110 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
111 (RRD_READONLY | RRD_READWRITE)) {
112 /* Both READONLY and READWRITE were given, which is invalid. */
113 rrd_set_error("in read/write request mask");
114 exit(-1);
115 }
116 #endif
117 if (rdwr & RRD_READONLY) {
118 flags |= O_RDONLY;
119 #ifdef HAVE_MMAP
120 mm_flags = MAP_PRIVATE;
121 # ifdef MAP_NORESERVE
122 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
123 # endif
124 #endif
125 } else {
126 if (rdwr & RRD_READWRITE) {
127 #ifndef WIN32 // Win32 can't use this mode
128 mode |= S_IWUSR;
129 #endif
130 flags |= O_RDWR;
131 #ifdef HAVE_MMAP
132 mm_flags = MAP_SHARED;
133 mm_prot |= PROT_WRITE;
134 #endif
135 }
136 if (rdwr & RRD_CREAT) {
137 flags |= (O_CREAT | O_TRUNC);
138 }
139 }
140 if (rdwr & RRD_READAHEAD) {
141 #ifdef MAP_POPULATE
142 mm_flags |= MAP_POPULATE; /* populate ptes and data */
143 #endif
144 #if defined MAP_NONBLOCK
145 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
146 #endif
147 }
148 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
149 flags |= O_BINARY;
150 #endif
152 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
153 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
154 goto out_free;
155 }
157 /* Better try to avoid seeks as much as possible. stat may be heavy but
158 * many concurrent seeks are even worse. */
159 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
160 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
161 goto out_close;
162 }
163 if (newfile_size == 0) {
164 rrd_file->file_len = statb.st_size;
165 } else {
166 rrd_file->file_len = newfile_size;
167 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
168 write(rrd_file->fd, "\0", 1); /* poke */
169 lseek(rrd_file->fd, 0, SEEK_SET);
170 }
171 #ifdef HAVE_POSIX_FADVISE
172 /* In general we need no read-ahead when dealing with rrd_files.
173 When we stop reading, it is highly unlikely that we start up again.
174 In this manner we actually save time and diskaccess (and buffer cache).
175 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
176 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
177 #endif
179 /*
180 if (rdwr & RRD_READWRITE)
181 {
182 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
183 rrd_set_error("failed to disable the stream buffer\n");
184 return (-1);
185 }
186 }
187 */
188 #ifdef HAVE_MMAP
189 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
190 rrd_file->fd, offset);
192 /* lets see if the first read worked */
193 if (data == MAP_FAILED) {
194 rrd_set_error("mmaping file '%s': %s", file_name,
195 rrd_strerror(errno));
196 goto out_close;
197 }
198 rrd_file->file_start = data;
199 if (rdwr & RRD_CREAT) {
200 memset(data, DNAN, newfile_size - 1);
201 goto out_done;
202 }
203 #endif
204 if (rdwr & RRD_CREAT)
205 goto out_done;
206 #ifdef USE_MADVISE
207 if (rdwr & RRD_COPY) {
208 /* We will read everything in a moment (copying) */
209 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
210 } else {
211 /* We do not need to read anything in for the moment */
212 madvise(data, rrd_file->file_len, MADV_RANDOM);
213 /* the stat_head will be needed soonish, so hint accordingly */
214 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
215 }
216 #endif
218 __rrd_read(rrd->stat_head, stat_head_t,
219 1);
221 /* lets do some test if we are on track ... */
222 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
223 rrd_set_error("'%s' is not an RRD file", file_name);
224 goto out_nullify_head;
225 }
227 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
228 rrd_set_error("This RRD was created on another architecture");
229 goto out_nullify_head;
230 }
232 version = atoi(rrd->stat_head->version);
234 if (version > atoi(RRD_VERSION)) {
235 rrd_set_error("can't handle RRD file version %s",
236 rrd->stat_head->version);
237 goto out_nullify_head;
238 }
239 #if defined USE_MADVISE
240 /* the ds_def will be needed soonish, so hint accordingly */
241 madvise(data + PAGE_START(offset),
242 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
243 #endif
244 __rrd_read(rrd->ds_def, ds_def_t,
245 rrd->stat_head->ds_cnt);
247 #if defined USE_MADVISE
248 /* the rra_def will be needed soonish, so hint accordingly */
249 madvise(data + PAGE_START(offset),
250 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
251 #endif
252 __rrd_read(rrd->rra_def, rra_def_t,
253 rrd->stat_head->rra_cnt);
255 /* handle different format for the live_head */
256 if (version < 3) {
257 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
258 if (rrd->live_head == NULL) {
259 rrd_set_error("live_head_t malloc");
260 goto out_close;
261 }
262 #if defined USE_MADVISE
263 /* the live_head will be needed soonish, so hint accordingly */
264 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
265 #endif
266 __rrd_read(rrd->legacy_last_up, time_t,
267 1);
269 rrd->live_head->last_up = *rrd->legacy_last_up;
270 rrd->live_head->last_up_usec = 0;
271 } else {
272 #if defined USE_MADVISE
273 /* the live_head will be needed soonish, so hint accordingly */
274 madvise(data + PAGE_START(offset),
275 sizeof(live_head_t), MADV_WILLNEED);
276 #endif
277 __rrd_read(rrd->live_head, live_head_t,
278 1);
279 }
280 __rrd_read(rrd->pdp_prep, pdp_prep_t,
281 rrd->stat_head->ds_cnt);
282 __rrd_read(rrd->cdp_prep, cdp_prep_t,
283 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
284 __rrd_read(rrd->rra_ptr, rra_ptr_t,
285 rrd->stat_head->rra_cnt);
287 rrd_file->header_len = offset;
288 rrd_file->pos = offset;
290 {
291 unsigned long row_cnt = 0;
292 unsigned long i;
294 for (i=0; i<rrd->stat_head->rra_cnt; i++)
295 row_cnt += rrd->rra_def[i].row_cnt;
297 off_t correct_len = rrd_file->header_len +
298 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
300 if (correct_len > rrd_file->file_len)
301 {
302 rrd_set_error("'%s' is too small (should be %ld bytes)",
303 file_name, (long long) correct_len);
304 goto out_nullify_head;
305 }
306 }
308 out_done:
309 return (rrd_file);
310 out_nullify_head:
311 rrd->stat_head = NULL;
312 out_close:
313 #ifdef HAVE_MMAP
314 if (data != MAP_FAILED)
315 munmap(data, rrd_file->file_len);
316 #endif
317 close(rrd_file->fd);
318 out_free:
319 free(rrd_file);
320 return NULL;
321 }
324 #if defined DEBUG && DEBUG > 1
325 /* Print list of in-core pages of a the current rrd_file. */
326 static
327 void mincore_print(
328 rrd_file_t *rrd_file,
329 char *mark)
330 {
331 #ifdef HAVE_MMAP
332 /* pretty print blocks in core */
333 off_t off;
334 unsigned char *vec;
335 ssize_t _page_size = sysconf(_SC_PAGESIZE);
337 off = rrd_file->file_len +
338 ((rrd_file->file_len + _page_size - 1) / _page_size);
339 vec = malloc(off);
340 if (vec != NULL) {
341 memset(vec, 0, off);
342 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
343 int prev;
344 unsigned is_in = 0, was_in = 0;
346 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
347 is_in = vec[off] & 1; /* if lsb set then is core resident */
348 if (off == 0)
349 was_in = is_in;
350 if (was_in != is_in) {
351 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
352 was_in ? "" : "not ", vec + prev, off - prev);
353 was_in = is_in;
354 prev = off;
355 }
356 }
357 fprintf(stderr,
358 "%s: %sin core: %p len %ld\n", mark,
359 was_in ? "" : "not ", vec + prev, off - prev);
360 } else
361 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
362 }
363 #else
364 fprintf(stderr, "sorry mincore only works with mmap");
365 #endif
366 }
367 #endif /* defined DEBUG && DEBUG > 1 */
370 /* drop cache except for the header and the active pages */
371 void rrd_dontneed(
372 rrd_file_t *rrd_file,
373 rrd_t *rrd)
374 {
375 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
376 unsigned long dontneed_start;
377 unsigned long rra_start;
378 unsigned long active_block;
379 unsigned long i;
380 ssize_t _page_size = sysconf(_SC_PAGESIZE);
382 if (rrd_file == NULL) {
383 #if defined DEBUG && DEBUG
384 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
385 #endif
386 return;
387 }
389 #if defined DEBUG && DEBUG > 1
390 mincore_print(rrd_file, "before");
391 #endif
393 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
394 rra_start = rrd_file->header_len;
395 dontneed_start = PAGE_START(rra_start) + _page_size;
396 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
397 active_block =
398 PAGE_START(rra_start
399 + rrd->rra_ptr[i].cur_row
400 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
401 if (active_block > dontneed_start) {
402 #ifdef USE_MADVISE
403 madvise(rrd_file->file_start + dontneed_start,
404 active_block - dontneed_start - 1, MADV_DONTNEED);
405 #endif
406 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
407 #ifdef HAVE_POSIX_FADVISE
408 posix_fadvise(rrd_file->fd, dontneed_start,
409 active_block - dontneed_start - 1,
410 POSIX_FADV_DONTNEED);
411 #endif
412 }
413 dontneed_start = active_block;
414 /* do not release 'hot' block if update for this RAA will occur
415 * within 10 minutes */
416 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
417 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
418 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
419 dontneed_start += _page_size;
420 }
421 rra_start +=
422 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
423 sizeof(rrd_value_t);
424 }
426 if (dontneed_start < rrd_file->file_len) {
427 #ifdef USE_MADVISE
428 madvise(rrd_file->file_start + dontneed_start,
429 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
430 #endif
431 #ifdef HAVE_POSIX_FADVISE
432 posix_fadvise(rrd_file->fd, dontneed_start,
433 rrd_file->file_len - dontneed_start,
434 POSIX_FADV_DONTNEED);
435 #endif
436 }
438 #if defined DEBUG && DEBUG > 1
439 mincore_print(rrd_file, "after");
440 #endif
441 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
442 }
448 int rrd_close(
449 rrd_file_t *rrd_file)
450 {
451 int ret;
453 #ifdef HAVE_MMAP
454 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
455 if (ret != 0)
456 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
457 ret = munmap(rrd_file->file_start, rrd_file->file_len);
458 if (ret != 0)
459 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
460 #endif
461 ret = close(rrd_file->fd);
462 if (ret != 0)
463 rrd_set_error("closing file: %s", rrd_strerror(errno));
464 free(rrd_file);
465 rrd_file = NULL;
466 return ret;
467 }
470 /* Set position of rrd_file. */
472 off_t rrd_seek(
473 rrd_file_t *rrd_file,
474 off_t off,
475 int whence)
476 {
477 off_t ret = 0;
479 #ifdef HAVE_MMAP
480 if (whence == SEEK_SET)
481 rrd_file->pos = off;
482 else if (whence == SEEK_CUR)
483 rrd_file->pos += off;
484 else if (whence == SEEK_END)
485 rrd_file->pos = rrd_file->file_len + off;
486 #else
487 ret = lseek(rrd_file->fd, off, whence);
488 if (ret < 0)
489 rrd_set_error("lseek: %s", rrd_strerror(errno));
490 rrd_file->pos = ret;
491 #endif
492 /* mimic fseek, which returns 0 upon success */
493 return ret < 0; /*XXX: or just ret to mimic lseek */
494 }
497 /* Get current position in rrd_file. */
499 off_t rrd_tell(
500 rrd_file_t *rrd_file)
501 {
502 return rrd_file->pos;
503 }
506 /* Read count bytes into buffer buf, starting at rrd_file->pos.
507 * Returns the number of bytes read or <0 on error. */
509 ssize_t rrd_read(
510 rrd_file_t *rrd_file,
511 void *buf,
512 size_t count)
513 {
514 #ifdef HAVE_MMAP
515 size_t _cnt = count;
516 ssize_t _surplus;
518 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
519 return 0;
520 if (buf == NULL)
521 return -1; /* EINVAL */
522 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
523 if (_surplus > 0) { /* short read */
524 _cnt -= _surplus;
525 }
526 if (_cnt == 0)
527 return 0; /* EOF */
528 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
530 rrd_file->pos += _cnt; /* mimmic read() semantics */
531 return _cnt;
532 #else
533 ssize_t ret;
535 ret = read(rrd_file->fd, buf, count);
536 if (ret > 0)
537 rrd_file->pos += ret; /* mimmic read() semantics */
538 return ret;
539 #endif
540 }
543 /* Write count bytes from buffer buf to the current position
544 * rrd_file->pos of rrd_file->fd.
545 * Returns the number of bytes written or <0 on error. */
547 ssize_t rrd_write(
548 rrd_file_t *rrd_file,
549 const void *buf,
550 size_t count)
551 {
552 #ifdef HAVE_MMAP
553 if (count == 0)
554 return 0;
555 if (buf == NULL)
556 return -1; /* EINVAL */
557 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
558 rrd_file->pos += count;
559 return count; /* mimmic write() semantics */
560 #else
561 ssize_t _sz = write(rrd_file->fd, buf, count);
563 if (_sz > 0)
564 rrd_file->pos += _sz;
565 return _sz;
566 #endif
567 }
570 /* flush all data pending to be written to FD. */
572 void rrd_flush(
573 rrd_file_t *rrd_file)
574 {
575 /*
576 * Win32 can only flush files by FlushFileBuffers function,
577 * but it works with HANDLE hFile, not FILE. So skipping
578 */
579 #ifndef WIN32
580 if (fdatasync(rrd_file->fd) != 0) {
581 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
582 rrd_strerror(errno));
583 }
584 #endif
585 }
588 /* Initialize RRD header. */
590 void rrd_init(
591 rrd_t *rrd)
592 {
593 rrd->stat_head = NULL;
594 rrd->ds_def = NULL;
595 rrd->rra_def = NULL;
596 rrd->live_head = NULL;
597 rrd->legacy_last_up = NULL;
598 rrd->rra_ptr = NULL;
599 rrd->pdp_prep = NULL;
600 rrd->cdp_prep = NULL;
601 rrd->rrd_value = NULL;
602 }
605 /* free RRD header data. */
607 #ifdef HAVE_MMAP
608 void rrd_free(
609 rrd_t *rrd)
610 {
611 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
612 free(rrd->live_head);
613 }
614 }
615 #else
616 void rrd_free(
617 rrd_t *rrd)
618 {
619 free(rrd->live_head);
620 free(rrd->stat_head);
621 free(rrd->ds_def);
622 free(rrd->rra_def);
623 free(rrd->rra_ptr);
624 free(rrd->pdp_prep);
625 free(rrd->cdp_prep);
626 free(rrd->rrd_value);
627 }
628 #endif
631 /* routine used by external libraries to free memory allocated by
632 * rrd library */
634 void rrd_freemem(
635 void *mem)
636 {
637 free(mem);
638 }