1 /*****************************************************************************
2 * RRDtool 1.3.2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) { \
25 size_t wanted = sizeof(dst_t)*(cnt); \
26 if (offset + wanted > rrd_file->file_len) { \
27 rrd_set_error("reached EOF while loading header " #dst); \
28 goto out_nullify_head; \
29 } \
30 (dst) = (dst_t*)(void*) (data + offset); \
31 offset += wanted; \
32 }
33 #else
34 #define __rrd_read(dst, dst_t, cnt) { \
35 size_t wanted = sizeof(dst_t)*(cnt); \
36 size_t got; \
37 if ((dst = malloc(wanted)) == NULL) { \
38 rrd_set_error(#dst " malloc"); \
39 goto out_nullify_head; \
40 } \
41 got = read (rrd_file->fd, dst, wanted); \
42 if (got != wanted) { \
43 rrd_set_error("short read while reading header " #dst); \
44 goto out_nullify_head; \
45 } \
46 offset += got; \
47 }
48 #endif
50 /* get the address of the start of this page */
51 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
52 #ifndef PAGE_START
53 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
54 #endif
55 #endif
57 /* Open a database file, return its header and an open filehandle,
58 * positioned to the first cdp in the first rra.
59 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
60 * before returning an error. Do not call rrd_close upon failure of rrd_open.
61 */
63 rrd_file_t *rrd_open(
64 const char *const file_name,
65 rrd_t *rrd,
66 unsigned rdwr)
67 {
68 int flags = 0;
69 mode_t mode = S_IRUSR;
70 int version;
72 #ifdef HAVE_MMAP
73 ssize_t _page_size = sysconf(_SC_PAGESIZE);
74 int mm_prot = PROT_READ, mm_flags = 0;
75 char *data = MAP_FAILED;
76 #endif
77 off_t offset = 0;
78 struct stat statb;
79 rrd_file_t *rrd_file = NULL;
80 off_t newfile_size = 0;
82 if (rdwr & RRD_CREAT) {
83 /* yes bad inline signaling alert, we are using the
84 floatcookie to pass the size in ... only used in resize */
85 newfile_size = (off_t) rrd->stat_head->float_cookie;
86 free(rrd->stat_head);
87 }
88 rrd_init(rrd);
89 rrd_file = malloc(sizeof(rrd_file_t));
90 if (rrd_file == NULL) {
91 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
92 return NULL;
93 }
94 memset(rrd_file, 0, sizeof(rrd_file_t));
96 #ifdef DEBUG
97 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
98 (RRD_READONLY | RRD_READWRITE)) {
99 /* Both READONLY and READWRITE were given, which is invalid. */
100 rrd_set_error("in read/write request mask");
101 exit(-1);
102 }
103 #endif
104 if (rdwr & RRD_READONLY) {
105 flags |= O_RDONLY;
106 #ifdef HAVE_MMAP
107 mm_flags = MAP_PRIVATE;
108 # ifdef MAP_NORESERVE
109 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
110 # endif
111 #endif
112 } else {
113 if (rdwr & RRD_READWRITE) {
114 mode |= S_IWUSR;
115 flags |= O_RDWR;
116 #ifdef HAVE_MMAP
117 mm_flags = MAP_SHARED;
118 mm_prot |= PROT_WRITE;
119 #endif
120 }
121 if (rdwr & RRD_CREAT) {
122 flags |= (O_CREAT | O_TRUNC);
123 }
124 }
125 if (rdwr & RRD_READAHEAD) {
126 #ifdef MAP_POPULATE
127 mm_flags |= MAP_POPULATE; /* populate ptes and data */
128 #endif
129 #if defined MAP_NONBLOCK
130 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
131 #endif
132 }
133 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
134 flags |= O_BINARY;
135 #endif
137 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
138 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
139 goto out_free;
140 }
142 /* Better try to avoid seeks as much as possible. stat may be heavy but
143 * many concurrent seeks are even worse. */
144 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
145 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
146 goto out_close;
147 }
148 if (newfile_size == 0) {
149 rrd_file->file_len = statb.st_size;
150 } else {
151 rrd_file->file_len = newfile_size;
152 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
153 write(rrd_file->fd, "\0", 1); /* poke */
154 lseek(rrd_file->fd, 0, SEEK_SET);
155 }
156 #ifdef HAVE_POSIX_FADVISE
157 /* In general we need no read-ahead when dealing with rrd_files.
158 When we stop reading, it is highly unlikely that we start up again.
159 In this manner we actually save time and diskaccess (and buffer cache).
160 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
161 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
162 #endif
164 /*
165 if (rdwr & RRD_READWRITE)
166 {
167 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
168 rrd_set_error("failed to disable the stream buffer\n");
169 return (-1);
170 }
171 }
172 */
173 #ifdef HAVE_MMAP
174 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
175 rrd_file->fd, offset);
177 /* lets see if the first read worked */
178 if (data == MAP_FAILED) {
179 rrd_set_error("mmaping file '%s': %s", file_name,
180 rrd_strerror(errno));
181 goto out_close;
182 }
183 rrd_file->file_start = data;
184 if (rdwr & RRD_CREAT) {
185 memset(data, DNAN, newfile_size - 1);
186 goto out_done;
187 }
188 #endif
189 if (rdwr & RRD_CREAT)
190 goto out_done;
191 #ifdef USE_MADVISE
192 if (rdwr & RRD_COPY) {
193 /* We will read everything in a moment (copying) */
194 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
195 } else {
196 /* We do not need to read anything in for the moment */
197 madvise(data, rrd_file->file_len, MADV_RANDOM);
198 /* the stat_head will be needed soonish, so hint accordingly */
199 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
200 }
201 #endif
203 __rrd_read(rrd->stat_head, stat_head_t,
204 1);
206 /* lets do some test if we are on track ... */
207 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
208 rrd_set_error("'%s' is not an RRD file", file_name);
209 goto out_nullify_head;
210 }
212 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
213 rrd_set_error("This RRD was created on another architecture");
214 goto out_nullify_head;
215 }
217 version = atoi(rrd->stat_head->version);
219 if (version > atoi(RRD_VERSION)) {
220 rrd_set_error("can't handle RRD file version %s",
221 rrd->stat_head->version);
222 goto out_nullify_head;
223 }
224 #if defined USE_MADVISE
225 /* the ds_def will be needed soonish, so hint accordingly */
226 madvise(data + PAGE_START(offset),
227 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
228 #endif
229 __rrd_read(rrd->ds_def, ds_def_t,
230 rrd->stat_head->ds_cnt);
232 #if defined USE_MADVISE
233 /* the rra_def will be needed soonish, so hint accordingly */
234 madvise(data + PAGE_START(offset),
235 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
236 #endif
237 __rrd_read(rrd->rra_def, rra_def_t,
238 rrd->stat_head->rra_cnt);
240 /* handle different format for the live_head */
241 if (version < 3) {
242 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
243 if (rrd->live_head == NULL) {
244 rrd_set_error("live_head_t malloc");
245 goto out_close;
246 }
247 #if defined USE_MADVISE
248 /* the live_head will be needed soonish, so hint accordingly */
249 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
250 #endif
251 __rrd_read(rrd->legacy_last_up, time_t,
252 1);
254 rrd->live_head->last_up = *rrd->legacy_last_up;
255 rrd->live_head->last_up_usec = 0;
256 } else {
257 #if defined USE_MADVISE
258 /* the live_head will be needed soonish, so hint accordingly */
259 madvise(data + PAGE_START(offset),
260 sizeof(live_head_t), MADV_WILLNEED);
261 #endif
262 __rrd_read(rrd->live_head, live_head_t,
263 1);
264 }
265 __rrd_read(rrd->pdp_prep, pdp_prep_t,
266 rrd->stat_head->ds_cnt);
267 __rrd_read(rrd->cdp_prep, cdp_prep_t,
268 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
269 __rrd_read(rrd->rra_ptr, rra_ptr_t,
270 rrd->stat_head->rra_cnt);
272 rrd_file->header_len = offset;
273 rrd_file->pos = offset;
275 {
276 unsigned long row_cnt = 0;
277 unsigned long i;
279 for (i=0; i<rrd->stat_head->rra_cnt; i++)
280 row_cnt += rrd->rra_def[i].row_cnt;
282 off_t correct_len = rrd_file->header_len +
283 sizeof(rrd_value_t) * row_cnt * rrd->stat_head->ds_cnt;
285 if (correct_len > rrd_file->file_len)
286 {
287 rrd_set_error("'%s' is too small (should be %ld bytes)",
288 file_name, (long long) correct_len);
289 goto out_nullify_head;
290 }
291 }
293 out_done:
294 return (rrd_file);
295 out_nullify_head:
296 rrd->stat_head = NULL;
297 out_close:
298 #ifdef HAVE_MMAP
299 if (data != MAP_FAILED)
300 munmap(data, rrd_file->file_len);
301 #endif
302 close(rrd_file->fd);
303 out_free:
304 free(rrd_file);
305 return NULL;
306 }
309 #if defined DEBUG && DEBUG > 1
310 /* Print list of in-core pages of a the current rrd_file. */
311 static
312 void mincore_print(
313 rrd_file_t *rrd_file,
314 char *mark)
315 {
316 #ifdef HAVE_MMAP
317 /* pretty print blocks in core */
318 off_t off;
319 unsigned char *vec;
320 ssize_t _page_size = sysconf(_SC_PAGESIZE);
322 off = rrd_file->file_len +
323 ((rrd_file->file_len + _page_size - 1) / _page_size);
324 vec = malloc(off);
325 if (vec != NULL) {
326 memset(vec, 0, off);
327 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
328 int prev;
329 unsigned is_in = 0, was_in = 0;
331 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
332 is_in = vec[off] & 1; /* if lsb set then is core resident */
333 if (off == 0)
334 was_in = is_in;
335 if (was_in != is_in) {
336 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
337 was_in ? "" : "not ", vec + prev, off - prev);
338 was_in = is_in;
339 prev = off;
340 }
341 }
342 fprintf(stderr,
343 "%s: %sin core: %p len %ld\n", mark,
344 was_in ? "" : "not ", vec + prev, off - prev);
345 } else
346 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
347 }
348 #else
349 fprintf(stderr, "sorry mincore only works with mmap");
350 #endif
351 }
352 #endif /* defined DEBUG && DEBUG > 1 */
355 /* drop cache except for the header and the active pages */
356 void rrd_dontneed(
357 rrd_file_t *rrd_file,
358 rrd_t *rrd)
359 {
360 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
361 off_t dontneed_start;
362 off_t rra_start;
363 off_t active_block;
364 unsigned long i;
365 ssize_t _page_size = sysconf(_SC_PAGESIZE);
367 if (rrd_file == NULL) {
368 #if defined DEBUG && DEBUG
369 fprintf (stderr, "rrd_dontneed: Argument 'rrd_file' is NULL.\n");
370 #endif
371 return;
372 }
374 #if defined DEBUG && DEBUG > 1
375 mincore_print(rrd_file, "before");
376 #endif
378 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
379 rra_start = rrd_file->header_len;
380 dontneed_start = PAGE_START(rra_start) + _page_size;
381 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
382 active_block =
383 PAGE_START(rra_start
384 + rrd->rra_ptr[i].cur_row
385 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
386 if (active_block > dontneed_start) {
387 #ifdef USE_MADVISE
388 madvise(rrd_file->file_start + dontneed_start,
389 active_block - dontneed_start - 1, MADV_DONTNEED);
390 #endif
391 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
392 #ifdef HAVE_POSIX_FADVISE
393 posix_fadvise(rrd_file->fd, dontneed_start,
394 active_block - dontneed_start - 1,
395 POSIX_FADV_DONTNEED);
396 #endif
397 }
398 dontneed_start = active_block;
399 /* do not release 'hot' block if update for this RAA will occur
400 * within 10 minutes */
401 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
402 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
403 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
404 dontneed_start += _page_size;
405 }
406 rra_start +=
407 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
408 sizeof(rrd_value_t);
409 }
411 if (dontneed_start < rrd_file->file_len) {
412 #ifdef USE_MADVISE
413 madvise(rrd_file->file_start + dontneed_start,
414 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
415 #endif
416 #ifdef HAVE_POSIX_FADVISE
417 posix_fadvise(rrd_file->fd, dontneed_start,
418 rrd_file->file_len - dontneed_start,
419 POSIX_FADV_DONTNEED);
420 #endif
421 }
423 #if defined DEBUG && DEBUG > 1
424 mincore_print(rrd_file, "after");
425 #endif
426 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
427 }
433 int rrd_close(
434 rrd_file_t *rrd_file)
435 {
436 int ret;
438 #ifdef HAVE_MMAP
439 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
440 if (ret != 0)
441 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
442 ret = munmap(rrd_file->file_start, rrd_file->file_len);
443 if (ret != 0)
444 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
445 #endif
446 ret = close(rrd_file->fd);
447 if (ret != 0)
448 rrd_set_error("closing file: %s", rrd_strerror(errno));
449 free(rrd_file);
450 rrd_file = NULL;
451 return ret;
452 }
455 /* Set position of rrd_file. */
457 off_t rrd_seek(
458 rrd_file_t *rrd_file,
459 off_t off,
460 int whence)
461 {
462 off_t ret = 0;
464 #ifdef HAVE_MMAP
465 if (whence == SEEK_SET)
466 rrd_file->pos = off;
467 else if (whence == SEEK_CUR)
468 rrd_file->pos += off;
469 else if (whence == SEEK_END)
470 rrd_file->pos = rrd_file->file_len + off;
471 #else
472 ret = lseek(rrd_file->fd, off, whence);
473 if (ret < 0)
474 rrd_set_error("lseek: %s", rrd_strerror(errno));
475 rrd_file->pos = ret;
476 #endif
477 /* mimic fseek, which returns 0 upon success */
478 return ret < 0; /*XXX: or just ret to mimic lseek */
479 }
482 /* Get current position in rrd_file. */
484 off_t rrd_tell(
485 rrd_file_t *rrd_file)
486 {
487 return rrd_file->pos;
488 }
491 /* Read count bytes into buffer buf, starting at rrd_file->pos.
492 * Returns the number of bytes read or <0 on error. */
494 ssize_t rrd_read(
495 rrd_file_t *rrd_file,
496 void *buf,
497 size_t count)
498 {
499 #ifdef HAVE_MMAP
500 size_t _cnt = count;
501 ssize_t _surplus;
503 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
504 return 0;
505 if (buf == NULL)
506 return -1; /* EINVAL */
507 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
508 if (_surplus > 0) { /* short read */
509 _cnt -= _surplus;
510 }
511 if (_cnt == 0)
512 return 0; /* EOF */
513 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
515 rrd_file->pos += _cnt; /* mimmic read() semantics */
516 return _cnt;
517 #else
518 ssize_t ret;
520 ret = read(rrd_file->fd, buf, count);
521 if (ret > 0)
522 rrd_file->pos += ret; /* mimmic read() semantics */
523 return ret;
524 #endif
525 }
528 /* Write count bytes from buffer buf to the current position
529 * rrd_file->pos of rrd_file->fd.
530 * Returns the number of bytes written or <0 on error. */
532 ssize_t rrd_write(
533 rrd_file_t *rrd_file,
534 const void *buf,
535 size_t count)
536 {
537 #ifdef HAVE_MMAP
538 if (count == 0)
539 return 0;
540 if (buf == NULL)
541 return -1; /* EINVAL */
542 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
543 rrd_file->pos += count;
544 return count; /* mimmic write() semantics */
545 #else
546 ssize_t _sz = write(rrd_file->fd, buf, count);
548 if (_sz > 0)
549 rrd_file->pos += _sz;
550 return _sz;
551 #endif
552 }
555 /* flush all data pending to be written to FD. */
557 void rrd_flush(
558 rrd_file_t *rrd_file)
559 {
560 if (fdatasync(rrd_file->fd) != 0) {
561 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
562 rrd_strerror(errno));
563 }
564 }
567 /* Initialize RRD header. */
569 void rrd_init(
570 rrd_t *rrd)
571 {
572 rrd->stat_head = NULL;
573 rrd->ds_def = NULL;
574 rrd->rra_def = NULL;
575 rrd->live_head = NULL;
576 rrd->legacy_last_up = NULL;
577 rrd->rra_ptr = NULL;
578 rrd->pdp_prep = NULL;
579 rrd->cdp_prep = NULL;
580 rrd->rrd_value = NULL;
581 }
584 /* free RRD header data. */
586 #ifdef HAVE_MMAP
587 void rrd_free(
588 rrd_t *rrd)
589 {
590 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
591 free(rrd->live_head);
592 }
593 }
594 #else
595 void rrd_free(
596 rrd_t *rrd)
597 {
598 free(rrd->live_head);
599 free(rrd->stat_head);
600 free(rrd->ds_def);
601 free(rrd->rra_def);
602 free(rrd->rra_ptr);
603 free(rrd->pdp_prep);
604 free(rrd->cdp_prep);
605 free(rrd->rrd_value);
606 }
607 #endif
610 /* routine used by external libraries to free memory allocated by
611 * rrd library */
613 void rrd_freemem(
614 void *mem)
615 {
616 free(mem);
617 }