1 /*****************************************************************************
2 * RRDtool 1.3rc7 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
38 #ifndef PAGE_START
39 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
40 #endif
41 #endif
43 /* Open a database file, return its header and an open filehandle,
44 * positioned to the first cdp in the first rra.
45 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
46 * before returning an error. Do not call rrd_close upon failure of rrd_open.
47 */
49 rrd_file_t *rrd_open(
50 const char *const file_name,
51 rrd_t *rrd,
52 unsigned rdwr)
53 {
54 int flags = 0;
55 mode_t mode = S_IRUSR;
56 int version;
58 #ifdef HAVE_MMAP
59 ssize_t _page_size = sysconf(_SC_PAGESIZE);
60 int mm_prot = PROT_READ, mm_flags = 0;
61 char *data;
62 #endif
63 off_t offset = 0;
64 struct stat statb;
65 rrd_file_t *rrd_file = NULL;
66 off_t newfile_size = 0;
68 if (rdwr & RRD_CREAT) {
69 /* yes bad inline signaling alert, we are using the
70 floatcookie to pass the size in ... only used in resize */
71 newfile_size = (off_t) rrd->stat_head->float_cookie;
72 free(rrd->stat_head);
73 }
74 rrd_init(rrd);
75 rrd_file = malloc(sizeof(rrd_file_t));
76 if (rrd_file == NULL) {
77 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
78 return NULL;
79 }
80 memset(rrd_file, 0, sizeof(rrd_file_t));
82 #ifdef DEBUG
83 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
84 (RRD_READONLY | RRD_READWRITE)) {
85 /* Both READONLY and READWRITE were given, which is invalid. */
86 rrd_set_error("in read/write request mask");
87 exit(-1);
88 }
89 #endif
90 if (rdwr & RRD_READONLY) {
91 flags |= O_RDONLY;
92 #ifdef HAVE_MMAP
93 mm_flags = MAP_PRIVATE;
94 # ifdef MAP_NORESERVE
95 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
96 # endif
97 #endif
98 } else {
99 if (rdwr & RRD_READWRITE) {
100 mode |= S_IWUSR;
101 flags |= O_RDWR;
102 #ifdef HAVE_MMAP
103 mm_flags = MAP_SHARED;
104 mm_prot |= PROT_WRITE;
105 #endif
106 }
107 if (rdwr & RRD_CREAT) {
108 flags |= (O_CREAT | O_TRUNC);
109 }
110 }
111 if (rdwr & RRD_READAHEAD) {
112 #ifdef MAP_POPULATE
113 mm_flags |= MAP_POPULATE; /* populate ptes and data */
114 #endif
115 #if defined MAP_NONBLOCK
116 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
117 #endif
118 }
120 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
121 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
122 goto out_free;
123 }
125 /* Better try to avoid seeks as much as possible. stat may be heavy but
126 * many concurrent seeks are even worse. */
127 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
128 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
129 goto out_close;
130 }
131 if (newfile_size == 0) {
132 rrd_file->file_len = statb.st_size;
133 } else {
134 rrd_file->file_len = newfile_size;
135 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
136 write(rrd_file->fd, "\0", 1); /* poke */
137 lseek(rrd_file->fd, 0, SEEK_SET);
138 }
139 #ifdef HAVE_POSIX_FADVISE
140 /* In general we need no read-ahead when dealing with rrd_files.
141 When we stop reading, it is highly unlikely that we start up again.
142 In this manner we actually save time and diskaccess (and buffer cache).
143 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
144 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
145 #endif
147 /*
148 if (rdwr & RRD_READWRITE)
149 {
150 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
151 rrd_set_error("failed to disable the stream buffer\n");
152 return (-1);
153 }
154 }
155 */
156 #ifdef HAVE_MMAP
157 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
158 rrd_file->fd, offset);
160 /* lets see if the first read worked */
161 if (data == MAP_FAILED) {
162 rrd_set_error("mmaping file '%s': %s", file_name,
163 rrd_strerror(errno));
164 goto out_close;
165 }
166 rrd_file->file_start = data;
167 if (rdwr & RRD_CREAT) {
168 memset(data, DNAN, newfile_size - 1);
169 goto out_done;
170 }
171 #endif
172 if (rdwr & RRD_CREAT)
173 goto out_done;
174 #ifdef USE_MADVISE
175 if (rdwr & RRD_COPY) {
176 /* We will read everything in a moment (copying) */
177 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
178 } else {
179 /* We do not need to read anything in for the moment */
180 madvise(data, rrd_file->file_len, MADV_RANDOM);
181 /* the stat_head will be needed soonish, so hint accordingly */
182 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
183 }
184 #endif
186 __rrd_read(rrd->stat_head, stat_head_t,
187 1);
189 /* lets do some test if we are on track ... */
190 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
191 rrd_set_error("'%s' is not an RRD file", file_name);
192 goto out_nullify_head;
193 }
195 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
196 rrd_set_error("This RRD was created on another architecture");
197 goto out_nullify_head;
198 }
200 version = atoi(rrd->stat_head->version);
202 if (version > atoi(RRD_VERSION)) {
203 rrd_set_error("can't handle RRD file version %s",
204 rrd->stat_head->version);
205 goto out_nullify_head;
206 }
207 #if defined USE_MADVISE
208 /* the ds_def will be needed soonish, so hint accordingly */
209 madvise(data + PAGE_START(offset),
210 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
211 #endif
212 __rrd_read(rrd->ds_def, ds_def_t,
213 rrd->stat_head->ds_cnt);
215 #if defined USE_MADVISE
216 /* the rra_def will be needed soonish, so hint accordingly */
217 madvise(data + PAGE_START(offset),
218 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
219 #endif
220 __rrd_read(rrd->rra_def, rra_def_t,
221 rrd->stat_head->rra_cnt);
223 /* handle different format for the live_head */
224 if (version < 3) {
225 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
226 if (rrd->live_head == NULL) {
227 rrd_set_error("live_head_t malloc");
228 goto out_close;
229 }
230 #if defined USE_MADVISE
231 /* the live_head will be needed soonish, so hint accordingly */
232 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
233 #endif
234 __rrd_read(rrd->legacy_last_up, time_t,
235 1);
237 rrd->live_head->last_up = *rrd->legacy_last_up;
238 rrd->live_head->last_up_usec = 0;
239 } else {
240 #if defined USE_MADVISE
241 /* the live_head will be needed soonish, so hint accordingly */
242 madvise(data + PAGE_START(offset),
243 sizeof(live_head_t), MADV_WILLNEED);
244 #endif
245 __rrd_read(rrd->live_head, live_head_t,
246 1);
247 }
248 __rrd_read(rrd->pdp_prep, pdp_prep_t,
249 rrd->stat_head->ds_cnt);
250 __rrd_read(rrd->cdp_prep, cdp_prep_t,
251 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
252 __rrd_read(rrd->rra_ptr, rra_ptr_t,
253 rrd->stat_head->rra_cnt);
255 rrd_file->header_len = offset;
256 rrd_file->pos = offset;
257 out_done:
258 return (rrd_file);
259 out_nullify_head:
260 rrd->stat_head = NULL;
261 out_close:
262 close(rrd_file->fd);
263 out_free:
264 free(rrd_file);
265 return NULL;
266 }
269 #if defined DEBUG && DEBUG > 1
270 /* Print list of in-core pages of a the current rrd_file. */
271 static
272 void mincore_print(
273 rrd_file_t *rrd_file,
274 char *mark)
275 {
276 #ifdef HAVE_MMAP
277 /* pretty print blocks in core */
278 off_t off;
279 unsigned char *vec;
280 ssize_t _page_size = sysconf(_SC_PAGESIZE);
282 off = rrd_file->file_len +
283 ((rrd_file->file_len + _page_size - 1) / _page_size);
284 vec = malloc(off);
285 if (vec != NULL) {
286 memset(vec, 0, off);
287 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
288 int prev;
289 unsigned is_in = 0, was_in = 0;
291 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
292 is_in = vec[off] & 1; /* if lsb set then is core resident */
293 if (off == 0)
294 was_in = is_in;
295 if (was_in != is_in) {
296 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
297 was_in ? "" : "not ", vec + prev, off - prev);
298 was_in = is_in;
299 prev = off;
300 }
301 }
302 fprintf(stderr,
303 "%s: %sin core: %p len %ld\n", mark,
304 was_in ? "" : "not ", vec + prev, off - prev);
305 } else
306 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
307 }
308 #else
309 fprintf(stderr, "sorry mincore only works with mmap");
310 #endif
311 }
312 #endif /* defined DEBUG && DEBUG > 1 */
315 /* drop cache except for the header and the active pages */
316 void rrd_dontneed(
317 rrd_file_t *rrd_file,
318 rrd_t *rrd)
319 {
320 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
321 unsigned long dontneed_start;
322 unsigned long rra_start;
323 unsigned long active_block;
324 unsigned long i;
325 ssize_t _page_size = sysconf(_SC_PAGESIZE);
327 #if defined DEBUG && DEBUG > 1
328 mincore_print(rrd_file, "before");
329 #endif
331 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
332 rra_start = rrd_file->header_len;
333 dontneed_start = PAGE_START(rra_start) + _page_size;
334 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
335 active_block =
336 PAGE_START(rra_start
337 + rrd->rra_ptr[i].cur_row
338 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
339 if (active_block > dontneed_start) {
340 #ifdef USE_MADVISE
341 madvise(rrd_file->file_start + dontneed_start,
342 active_block - dontneed_start - 1, MADV_DONTNEED);
343 #endif
344 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
345 #ifdef HAVE_POSIX_FADVISE
346 posix_fadvise(rrd_file->fd, dontneed_start,
347 active_block - dontneed_start - 1,
348 POSIX_FADV_DONTNEED);
349 #endif
350 }
351 dontneed_start = active_block;
352 /* do not release 'hot' block if update for this RAA will occur
353 * within 10 minutes */
354 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
355 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
356 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
357 dontneed_start += _page_size;
358 }
359 rra_start +=
360 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
361 sizeof(rrd_value_t);
362 }
363 #ifdef USE_MADVISE
364 madvise(rrd_file->file_start + dontneed_start,
365 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
366 #endif
367 #ifdef HAVE_POSIX_FADVISE
368 posix_fadvise(rrd_file->fd, dontneed_start,
369 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
370 #endif
371 #if defined DEBUG && DEBUG > 1
372 mincore_print(rrd_file, "after");
373 #endif
374 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
375 }
381 int rrd_close(
382 rrd_file_t *rrd_file)
383 {
384 int ret;
386 #ifdef HAVE_MMAP
387 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
388 if (ret != 0)
389 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
390 ret = munmap(rrd_file->file_start, rrd_file->file_len);
391 if (ret != 0)
392 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
393 #endif
394 ret = close(rrd_file->fd);
395 if (ret != 0)
396 rrd_set_error("closing file: %s", rrd_strerror(errno));
397 free(rrd_file);
398 rrd_file = NULL;
399 return ret;
400 }
403 /* Set position of rrd_file. */
405 off_t rrd_seek(
406 rrd_file_t *rrd_file,
407 off_t off,
408 int whence)
409 {
410 off_t ret = 0;
412 #ifdef HAVE_MMAP
413 if (whence == SEEK_SET)
414 rrd_file->pos = off;
415 else if (whence == SEEK_CUR)
416 rrd_file->pos += off;
417 else if (whence == SEEK_END)
418 rrd_file->pos = rrd_file->file_len + off;
419 #else
420 ret = lseek(rrd_file->fd, off, whence);
421 if (ret < 0)
422 rrd_set_error("lseek: %s", rrd_strerror(errno));
423 rrd_file->pos = ret;
424 #endif
425 /* mimic fseek, which returns 0 upon success */
426 return ret < 0; /*XXX: or just ret to mimic lseek */
427 }
430 /* Get current position in rrd_file. */
432 inline off_t rrd_tell(
433 rrd_file_t *rrd_file)
434 {
435 return rrd_file->pos;
436 }
439 /* Read count bytes into buffer buf, starting at rrd_file->pos.
440 * Returns the number of bytes read or <0 on error. */
442 inline ssize_t rrd_read(
443 rrd_file_t *rrd_file,
444 void *buf,
445 size_t count)
446 {
447 #ifdef HAVE_MMAP
448 size_t _cnt = count;
449 ssize_t _surplus;
451 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
452 return 0;
453 if (buf == NULL)
454 return -1; /* EINVAL */
455 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
456 if (_surplus > 0) { /* short read */
457 _cnt -= _surplus;
458 }
459 if (_cnt == 0)
460 return 0; /* EOF */
461 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
463 rrd_file->pos += _cnt; /* mimmic read() semantics */
464 return _cnt;
465 #else
466 ssize_t ret;
468 ret = read(rrd_file->fd, buf, count);
469 if (ret > 0)
470 rrd_file->pos += ret; /* mimmic read() semantics */
471 return ret;
472 #endif
473 }
476 /* Write count bytes from buffer buf to the current position
477 * rrd_file->pos of rrd_file->fd.
478 * Returns the number of bytes written or <0 on error. */
480 inline ssize_t rrd_write(
481 rrd_file_t *rrd_file,
482 const void *buf,
483 size_t count)
484 {
485 #ifdef HAVE_MMAP
486 if (count == 0)
487 return 0;
488 if (buf == NULL)
489 return -1; /* EINVAL */
490 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
491 rrd_file->pos += count;
492 return count; /* mimmic write() semantics */
493 #else
494 ssize_t _sz = write(rrd_file->fd, buf, count);
496 if (_sz > 0)
497 rrd_file->pos += _sz;
498 return _sz;
499 #endif
500 }
503 /* flush all data pending to be written to FD. */
505 inline void rrd_flush(
506 rrd_file_t *rrd_file)
507 {
508 if (fdatasync(rrd_file->fd) != 0) {
509 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
510 rrd_strerror(errno));
511 }
512 }
515 /* Initialize RRD header. */
517 void rrd_init(
518 rrd_t *rrd)
519 {
520 rrd->stat_head = NULL;
521 rrd->ds_def = NULL;
522 rrd->rra_def = NULL;
523 rrd->live_head = NULL;
524 rrd->legacy_last_up = NULL;
525 rrd->rra_ptr = NULL;
526 rrd->pdp_prep = NULL;
527 rrd->cdp_prep = NULL;
528 rrd->rrd_value = NULL;
529 }
532 /* free RRD header data. */
534 #ifdef HAVE_MMAP
535 void rrd_free(
536 rrd_t *rrd)
537 {
538 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
539 free(rrd->live_head);
540 }
541 }
542 #else
543 void rrd_free(
544 rrd_t *rrd)
545 {
546 free(rrd->live_head);
547 free(rrd->stat_head);
548 free(rrd->ds_def);
549 free(rrd->rra_def);
550 free(rrd->rra_ptr);
551 free(rrd->pdp_prep);
552 free(rrd->cdp_prep);
553 free(rrd->rrd_value);
554 }
555 #endif
558 /* routine used by external libraries to free memory allocated by
559 * rrd library */
561 void rrd_freemem(
562 void *mem)
563 {
564 free(mem);
565 }
568 /* XXX: FIXME: missing documentation. */
569 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
571 int /*_rrd_*/ readfile(
572 const char *file_name,
573 char **buffer,
574 int skipfirst)
575 {
576 long writecnt = 0, totalcnt = MEMBLK;
577 long offset = 0;
578 FILE *input = NULL;
579 char c;
581 if ((strcmp("-", file_name) == 0)) {
582 input = stdin;
583 } else {
584 if ((input = fopen(file_name, "rb")) == NULL) {
585 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
586 return (-1);
587 }
588 }
589 if (skipfirst) {
590 do {
591 c = getc(input);
592 offset++;
593 } while (c != '\n' && !feof(input));
594 }
595 if (strcmp("-", file_name)) {
596 fseek(input, 0, SEEK_END);
597 /* have extra space for detecting EOF without realloc */
598 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
599 if (totalcnt < MEMBLK)
600 totalcnt = MEMBLK; /* sanitize */
601 fseek(input, offset * sizeof(char), SEEK_SET);
602 }
603 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
604 perror("Allocate Buffer:");
605 exit(1);
606 };
607 do {
608 writecnt +=
609 fread((*buffer) + writecnt, 1,
610 (totalcnt - writecnt) * sizeof(char), input);
611 if (writecnt >= totalcnt) {
612 totalcnt += MEMBLK;
613 if (((*buffer) =
614 rrd_realloc((*buffer),
615 (totalcnt + 4) * sizeof(char))) == NULL) {
616 perror("Realloc Buffer:");
617 exit(1);
618 };
619 }
620 } while (!feof(input));
621 (*buffer)[writecnt] = '\0';
622 if (strcmp("-", file_name) != 0) {
623 fclose(input);
624 };
625 return writecnt;
626 }