e1685d2924aa8cb33be18ddc73dda5753692d560
1 /*****************************************************************************
2 * RRDtool 1.3rc7 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #ifndef PAGE_START
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
39 #endif
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
46 */
48 rrd_file_t *rrd_open(
49 const char *const file_name,
50 rrd_t *rrd,
51 unsigned rdwr)
52 {
53 int flags = 0;
54 mode_t mode = S_IRUSR;
55 int version;
57 #ifdef HAVE_MMAP
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
60 char *data;
61 #endif
62 off_t offset = 0;
63 struct stat statb;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
71 free(rrd->stat_head);
72 }
73 rrd_init(rrd);
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
77 return NULL;
78 }
79 memset(rrd_file, 0, sizeof(rrd_file_t));
81 #ifdef DEBUG
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
86 exit(-1);
87 }
88 #endif
89 if (rdwr & RRD_READONLY) {
90 flags |= O_RDONLY;
91 #ifdef HAVE_MMAP
92 mm_flags = MAP_PRIVATE;
93 # ifdef MAP_NORESERVE
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
95 # endif
96 #endif
97 } else {
98 if (rdwr & RRD_READWRITE) {
99 mode |= S_IWUSR;
100 flags |= O_RDWR;
101 #ifdef HAVE_MMAP
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
104 #endif
105 }
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
108 }
109 }
110 if (rdwr & RRD_READAHEAD) {
111 #ifdef MAP_POPULATE
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
113 #endif
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
116 #endif
117 }
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
121 goto out_free;
122 }
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
128 goto out_close;
129 }
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
132 } else {
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
137 }
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
144 #endif
146 /*
147 if (rdwr & RRD_READWRITE)
148 {
149 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
150 rrd_set_error("failed to disable the stream buffer\n");
151 return (-1);
152 }
153 }
154 */
155 #ifdef HAVE_MMAP
156 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
157 rrd_file->fd, offset);
159 /* lets see if the first read worked */
160 if (data == MAP_FAILED) {
161 rrd_set_error("mmaping file '%s': %s", file_name,
162 rrd_strerror(errno));
163 goto out_close;
164 }
165 rrd_file->file_start = data;
166 if (rdwr & RRD_CREAT) {
167 memset(data, DNAN, newfile_size - 1);
168 goto out_done;
169 }
170 #endif
171 if (rdwr & RRD_CREAT)
172 goto out_done;
173 #ifdef USE_MADVISE
174 if (rdwr & RRD_COPY) {
175 /* We will read everything in a moment (copying) */
176 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
177 } else {
178 /* We do not need to read anything in for the moment */
179 madvise(data, rrd_file->file_len, MADV_RANDOM);
180 /* the stat_head will be needed soonish, so hint accordingly */
181 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
182 }
183 #endif
185 __rrd_read(rrd->stat_head, stat_head_t,
186 1);
188 /* lets do some test if we are on track ... */
189 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
190 rrd_set_error("'%s' is not an RRD file", file_name);
191 goto out_nullify_head;
192 }
194 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
195 rrd_set_error("This RRD was created on another architecture");
196 goto out_nullify_head;
197 }
199 version = atoi(rrd->stat_head->version);
201 if (version > atoi(RRD_VERSION)) {
202 rrd_set_error("can't handle RRD file version %s",
203 rrd->stat_head->version);
204 goto out_nullify_head;
205 }
206 #if defined USE_MADVISE
207 /* the ds_def will be needed soonish, so hint accordingly */
208 madvise(data + PAGE_START(offset),
209 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
210 #endif
211 __rrd_read(rrd->ds_def, ds_def_t,
212 rrd->stat_head->ds_cnt);
214 #if defined USE_MADVISE
215 /* the rra_def will be needed soonish, so hint accordingly */
216 madvise(data + PAGE_START(offset),
217 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
218 #endif
219 __rrd_read(rrd->rra_def, rra_def_t,
220 rrd->stat_head->rra_cnt);
222 /* handle different format for the live_head */
223 if (version < 3) {
224 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
225 if (rrd->live_head == NULL) {
226 rrd_set_error("live_head_t malloc");
227 goto out_close;
228 }
230 #if defined USE_MADVISE
231 /* the live_head will be needed soonish, so hint accordingly */
232 madvise(data + PAGE_START(offset),
233 sizeof(time_t), MADV_WILLNEED);
234 #endif
235 __rrd_read(rrd->legacy_last_up,time_t,1);
236 rrd->live_head->last_up = *rrd->legacy_last_up;
237 rrd->live_head->last_up_usec = 0;
238 } else {
239 #if defined USE_MADVISE
240 /* the live_head will be needed soonish, so hint accordingly */
241 madvise(data + PAGE_START(offset),
242 sizeof(live_head_t), MADV_WILLNEED);
243 #endif
244 __rrd_read(rrd->live_head, live_head_t,
245 1);
246 }
247 __rrd_read(rrd->pdp_prep, pdp_prep_t,
248 rrd->stat_head->ds_cnt);
249 __rrd_read(rrd->cdp_prep, cdp_prep_t,
250 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
251 __rrd_read(rrd->rra_ptr, rra_ptr_t,
252 rrd->stat_head->rra_cnt);
254 rrd_file->header_len = offset;
255 rrd_file->pos = offset;
256 out_done:
257 return (rrd_file);
258 out_nullify_head:
259 rrd->stat_head = NULL;
260 out_close:
261 close(rrd_file->fd);
262 out_free:
263 free(rrd_file);
264 return NULL;
265 }
268 #if defined DEBUG && DEBUG > 1
269 /* Print list of in-core pages of a the current rrd_file. */
270 static
271 void mincore_print(
272 rrd_file_t *rrd_file,
273 char *mark)
274 {
275 #ifdef HAVE_MMAP
276 /* pretty print blocks in core */
277 off_t off;
278 unsigned char *vec;
279 ssize_t _page_size = sysconf(_SC_PAGESIZE);
281 off = rrd_file->file_len +
282 ((rrd_file->file_len + _page_size - 1) / _page_size);
283 vec = malloc(off);
284 if (vec != NULL) {
285 memset(vec, 0, off);
286 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
287 int prev;
288 unsigned is_in = 0, was_in = 0;
290 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
291 is_in = vec[off] & 1; /* if lsb set then is core resident */
292 if (off == 0)
293 was_in = is_in;
294 if (was_in != is_in) {
295 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
296 was_in ? "" : "not ", vec + prev, off - prev);
297 was_in = is_in;
298 prev = off;
299 }
300 }
301 fprintf(stderr,
302 "%s: %sin core: %p len %ld\n", mark,
303 was_in ? "" : "not ", vec + prev, off - prev);
304 } else
305 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
306 }
307 #else
308 fprintf(stderr, "sorry mincore only works with mmap");
309 #endif
310 }
311 #endif /* defined DEBUG && DEBUG > 1 */
314 /* drop cache except for the header and the active pages */
315 void rrd_dontneed(
316 rrd_file_t *rrd_file,
317 rrd_t *rrd)
318 {
319 unsigned long dontneed_start;
320 unsigned long rra_start;
321 unsigned long active_block;
322 unsigned long i;
323 ssize_t _page_size = sysconf(_SC_PAGESIZE);
325 #if defined DEBUG && DEBUG > 1
326 mincore_print(rrd_file, "before");
327 #endif
329 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
330 rra_start = rrd_file->header_len;
331 dontneed_start = PAGE_START(rra_start) + _page_size;
332 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
333 active_block =
334 PAGE_START(rra_start
335 + rrd->rra_ptr[i].cur_row
336 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
337 if (active_block > dontneed_start) {
338 #ifdef USE_MADVISE
339 madvise(rrd_file->file_start + dontneed_start,
340 active_block - dontneed_start - 1, MADV_DONTNEED);
341 #endif
342 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
343 #ifdef HAVE_POSIX_FADVISE
344 posix_fadvise(rrd_file->fd, dontneed_start,
345 active_block - dontneed_start - 1,
346 POSIX_FADV_DONTNEED);
347 #endif
348 }
349 dontneed_start = active_block;
350 /* do not release 'hot' block if update for this RAA will occur
351 * within 10 minutes */
352 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
353 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
354 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
355 dontneed_start += _page_size;
356 }
357 rra_start +=
358 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
359 sizeof(rrd_value_t);
360 }
361 #ifdef USE_MADVISE
362 madvise(rrd_file->file_start + dontneed_start,
363 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
364 #endif
365 #ifdef HAVE_POSIX_FADVISE
366 posix_fadvise(rrd_file->fd, dontneed_start,
367 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
368 #endif
369 #if defined DEBUG && DEBUG > 1
370 mincore_print(rrd_file, "after");
371 #endif
372 }
374 int rrd_close(
375 rrd_file_t *rrd_file)
376 {
377 int ret;
379 #ifdef HAVE_MMAP
380 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
381 if (ret != 0)
382 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
383 ret = munmap(rrd_file->file_start, rrd_file->file_len);
384 if (ret != 0)
385 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
386 #endif
387 ret = close(rrd_file->fd);
388 if (ret != 0)
389 rrd_set_error("closing file: %s", rrd_strerror(errno));
390 free(rrd_file);
391 rrd_file = NULL;
392 return ret;
393 }
396 /* Set position of rrd_file. */
398 off_t rrd_seek(
399 rrd_file_t *rrd_file,
400 off_t off,
401 int whence)
402 {
403 off_t ret = 0;
405 #ifdef HAVE_MMAP
406 if (whence == SEEK_SET)
407 rrd_file->pos = off;
408 else if (whence == SEEK_CUR)
409 rrd_file->pos += off;
410 else if (whence == SEEK_END)
411 rrd_file->pos = rrd_file->file_len + off;
412 #else
413 ret = lseek(rrd_file->fd, off, whence);
414 if (ret < 0)
415 rrd_set_error("lseek: %s", rrd_strerror(errno));
416 rrd_file->pos = ret;
417 #endif
418 /* mimic fseek, which returns 0 upon success */
419 return ret < 0; /*XXX: or just ret to mimic lseek */
420 }
423 /* Get current position in rrd_file. */
425 inline off_t rrd_tell(
426 rrd_file_t *rrd_file)
427 {
428 return rrd_file->pos;
429 }
432 /* Read count bytes into buffer buf, starting at rrd_file->pos.
433 * Returns the number of bytes read or <0 on error. */
435 inline ssize_t rrd_read(
436 rrd_file_t *rrd_file,
437 void *buf,
438 size_t count)
439 {
440 #ifdef HAVE_MMAP
441 size_t _cnt = count;
442 ssize_t _surplus;
444 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
445 return 0;
446 if (buf == NULL)
447 return -1; /* EINVAL */
448 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
449 if (_surplus > 0) { /* short read */
450 _cnt -= _surplus;
451 }
452 if (_cnt == 0)
453 return 0; /* EOF */
454 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
456 rrd_file->pos += _cnt; /* mimmic read() semantics */
457 return _cnt;
458 #else
459 ssize_t ret;
461 ret = read(rrd_file->fd, buf, count);
462 if (ret > 0)
463 rrd_file->pos += ret; /* mimmic read() semantics */
464 return ret;
465 #endif
466 }
469 /* Write count bytes from buffer buf to the current position
470 * rrd_file->pos of rrd_file->fd.
471 * Returns the number of bytes written or <0 on error. */
473 inline ssize_t rrd_write(
474 rrd_file_t *rrd_file,
475 const void *buf,
476 size_t count)
477 {
478 #ifdef HAVE_MMAP
479 if (count == 0)
480 return 0;
481 if (buf == NULL)
482 return -1; /* EINVAL */
483 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
484 rrd_file->pos += count;
485 return count; /* mimmic write() semantics */
486 #else
487 ssize_t _sz = write(rrd_file->fd, buf, count);
489 if (_sz > 0)
490 rrd_file->pos += _sz;
491 return _sz;
492 #endif
493 }
496 /* flush all data pending to be written to FD. */
498 inline void rrd_flush(
499 rrd_file_t *rrd_file)
500 {
501 if (fdatasync(rrd_file->fd) != 0) {
502 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
503 rrd_strerror(errno));
504 }
505 }
508 /* Initialize RRD header. */
510 void rrd_init(
511 rrd_t *rrd)
512 {
513 rrd->stat_head = NULL;
514 rrd->ds_def = NULL;
515 rrd->rra_def = NULL;
516 rrd->live_head = NULL;
517 rrd->legacy_last_up = NULL;
518 rrd->rra_ptr = NULL;
519 rrd->pdp_prep = NULL;
520 rrd->cdp_prep = NULL;
521 rrd->rrd_value = NULL;
522 }
525 /* free RRD header data. */
527 #ifdef HAVE_MMAP
528 void rrd_free(
529 rrd_t *rrd)
530 {
531 if (rrd->legacy_last_up){ /* this gets set for version < 3 only */
532 free(rrd->live_head);
533 }
534 }
535 #else
536 void rrd_free(
537 rrd_t *rrd)
538 {
539 free(rrd->live_head);
540 free(rrd->stat_head);
541 free(rrd->ds_def);
542 free(rrd->rra_def);
543 free(rrd->rra_ptr);
544 free(rrd->pdp_prep);
545 free(rrd->cdp_prep);
546 free(rrd->rrd_value);
547 }
548 #endif
551 /* routine used by external libraries to free memory allocated by
552 * rrd library */
554 void rrd_freemem(
555 void *mem)
556 {
557 free(mem);
558 }
561 /* XXX: FIXME: missing documentation. */
562 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
564 int /*_rrd_*/ readfile(
565 const char *file_name,
566 char **buffer,
567 int skipfirst)
568 {
569 long writecnt = 0, totalcnt = MEMBLK;
570 long offset = 0;
571 FILE *input = NULL;
572 char c;
574 if ((strcmp("-", file_name) == 0)) {
575 input = stdin;
576 } else {
577 if ((input = fopen(file_name, "rb")) == NULL) {
578 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
579 return (-1);
580 }
581 }
582 if (skipfirst) {
583 do {
584 c = getc(input);
585 offset++;
586 } while (c != '\n' && !feof(input));
587 }
588 if (strcmp("-", file_name)) {
589 fseek(input, 0, SEEK_END);
590 /* have extra space for detecting EOF without realloc */
591 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
592 if (totalcnt < MEMBLK)
593 totalcnt = MEMBLK; /* sanitize */
594 fseek(input, offset * sizeof(char), SEEK_SET);
595 }
596 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
597 perror("Allocate Buffer:");
598 exit(1);
599 };
600 do {
601 writecnt +=
602 fread((*buffer) + writecnt, 1,
603 (totalcnt - writecnt) * sizeof(char), input);
604 if (writecnt >= totalcnt) {
605 totalcnt += MEMBLK;
606 if (((*buffer) =
607 rrd_realloc((*buffer),
608 (totalcnt + 4) * sizeof(char))) == NULL) {
609 perror("Realloc Buffer:");
610 exit(1);
611 };
612 }
613 } while (!feof(input));
614 (*buffer)[writecnt] = '\0';
615 if (strcmp("-", file_name) != 0) {
616 fclose(input);
617 };
618 return writecnt;
619 }