1 /*****************************************************************************
2 * RRDtool 1.2.99907080300 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #ifndef PAGE_START
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
39 #endif
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
46 */
48 rrd_file_t *rrd_open(
49 const char *const file_name,
50 rrd_t *rrd,
51 unsigned rdwr)
52 {
53 int flags = 0;
54 mode_t mode = S_IRUSR;
55 int version;
57 #ifdef HAVE_MMAP
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
60 char *data;
61 #endif
62 off_t offset = 0;
63 struct stat statb;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
71 free(rrd->stat_head);
72 }
73 rrd_init(rrd);
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
77 return NULL;
78 }
79 memset(rrd_file, 0, sizeof(rrd_file_t));
81 #ifdef DEBUG
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
86 exit(-1);
87 }
88 #endif
89 if (rdwr & RRD_READONLY) {
90 flags |= O_RDONLY;
91 #ifdef HAVE_MMAP
92 mm_flags = MAP_PRIVATE;
93 # ifdef MAP_NORESERVE
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
95 # endif
96 #endif
97 } else {
98 if (rdwr & RRD_READWRITE) {
99 mode |= S_IWUSR;
100 flags |= O_RDWR;
101 #ifdef HAVE_MMAP
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
104 #endif
105 }
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
108 }
109 }
110 if (rdwr & RRD_READAHEAD) {
111 #ifdef MAP_POPULATE
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
113 #endif
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
116 #endif
117 }
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
121 goto out_free;
122 }
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
128 goto out_close;
129 }
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
132 } else {
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
137 }
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
144 #endif
146 /*
147 if (rdwr & RRD_READWRITE)
148 {
149 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
150 rrd_set_error("failed to disable the stream buffer\n");
151 return (-1);
152 }
153 }
154 */
155 #ifdef HAVE_MMAP
156 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
157 rrd_file->fd, offset);
159 /* lets see if the first read worked */
160 if (data == MAP_FAILED) {
161 rrd_set_error("mmaping file '%s': %s", file_name,
162 rrd_strerror(errno));
163 goto out_close;
164 }
165 rrd_file->file_start = data;
166 if (rdwr & RRD_CREAT) {
167 memset(data, DNAN, newfile_size - 1);
168 goto out_done;
169 }
170 #endif
171 if (rdwr & RRD_CREAT)
172 goto out_done;
173 #ifdef USE_MADVISE
174 if (rdwr & RRD_COPY) {
175 /* We will read everything in a moment (copying) */
176 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
177 } else {
178 /* We do not need to read anything in for the moment */
179 madvise(data, rrd_file->file_len, MADV_RANDOM);
180 /* the stat_head will be needed soonish, so hint accordingly */
181 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
182 }
183 #endif
185 __rrd_read(rrd->stat_head, stat_head_t,
186 1);
188 /* lets do some test if we are on track ... */
189 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
190 rrd_set_error("'%s' is not an RRD file", file_name);
191 goto out_nullify_head;
192 }
194 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
195 rrd_set_error("This RRD was created on another architecture");
196 goto out_nullify_head;
197 }
199 version = atoi(rrd->stat_head->version);
201 if (version > atoi(RRD_VERSION)) {
202 rrd_set_error("can't handle RRD file version %s",
203 rrd->stat_head->version);
204 goto out_nullify_head;
205 }
206 #if defined USE_MADVISE
207 /* the ds_def will be needed soonish, so hint accordingly */
208 madvise(data + PAGE_START(offset),
209 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
210 #endif
211 __rrd_read(rrd->ds_def, ds_def_t,
212 rrd->stat_head->ds_cnt);
214 #if defined USE_MADVISE
215 /* the rra_def will be needed soonish, so hint accordingly */
216 madvise(data + PAGE_START(offset),
217 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
218 #endif
219 __rrd_read(rrd->rra_def, rra_def_t,
220 rrd->stat_head->rra_cnt);
222 /* handle different format for the live_head */
223 if (version < 3) {
224 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
225 if (rrd->live_head == NULL) {
226 rrd_set_error("live_head_t malloc");
227 goto out_close;
228 }
229 #ifdef HAVE_MMAP
230 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
231 offset += sizeof(long);
232 #else
233 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
234 #endif
235 rrd->live_head->last_up_usec = 0;
236 } else {
237 #if defined USE_MADVISE
238 /* the live_head will be needed soonish, so hint accordingly */
239 madvise(data + PAGE_START(offset),
240 sizeof(live_head_t), MADV_WILLNEED);
241 #endif
242 __rrd_read(rrd->live_head, live_head_t,
243 1);
244 }
245 //XXX: This doesn't look like it needs madvise
246 __rrd_read(rrd->pdp_prep, pdp_prep_t,
247 rrd->stat_head->ds_cnt);
249 //XXX: This could benefit from madvise()ing
250 __rrd_read(rrd->cdp_prep, cdp_prep_t,
251 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
253 //XXX: This could benefit from madvise()ing
254 __rrd_read(rrd->rra_ptr, rra_ptr_t,
255 rrd->stat_head->rra_cnt);
257 rrd_file->header_len = offset;
258 rrd_file->pos = offset;
259 out_done:
260 return (rrd_file);
261 out_nullify_head:
262 rrd->stat_head = NULL;
263 out_close:
264 close(rrd_file->fd);
265 out_free:
266 free(rrd_file);
267 return NULL;
268 }
271 /* Print list of in-core pages of a the current rrd_file. */
272 static
273 void mincore_print(
274 rrd_file_t *rrd_file,
275 char *mark)
276 {
277 #ifdef HAVE_MMAP
278 /* pretty print blocks in core */
279 off_t off;
280 unsigned char *vec;
281 ssize_t _page_size = sysconf(_SC_PAGESIZE);
283 off = rrd_file->file_len +
284 ((rrd_file->file_len + _page_size - 1) / _page_size);
285 vec = malloc(off);
286 if (vec != NULL) {
287 memset(vec, 0, off);
288 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
289 int prev;
290 unsigned is_in = 0, was_in = 0;
292 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
293 is_in = vec[off] & 1; /* if lsb set then is core resident */
294 if (off == 0)
295 was_in = is_in;
296 if (was_in != is_in) {
297 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
298 was_in ? "" : "not ", vec + prev, off - prev);
299 was_in = is_in;
300 prev = off;
301 }
302 }
303 fprintf(stderr,
304 "%s: %sin core: %p len %ld\n", mark,
305 was_in ? "" : "not ", vec + prev, off - prev);
306 } else
307 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
308 }
309 #else
310 fprintf(stderr, "sorry mincore only works with mmap");
311 #endif
312 }
315 /* drop cache except for the header and the active pages */
316 void rrd_dontneed(
317 rrd_file_t *rrd_file,
318 rrd_t *rrd)
319 {
320 unsigned long dontneed_start;
321 unsigned long rra_start;
322 unsigned long active_block;
323 unsigned long i;
324 ssize_t _page_size = sysconf(_SC_PAGESIZE);
326 #if defined DEBUG && DEBUG > 1
327 mincore_print(rrd_file, "before");
328 #endif
330 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
331 rra_start = rrd_file->header_len;
332 dontneed_start = PAGE_START(rra_start) + _page_size;
333 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
334 active_block =
335 PAGE_START(rra_start
336 + rrd->rra_ptr[i].cur_row
337 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
338 if (active_block > dontneed_start) {
339 #ifdef USE_MADVISE
340 madvise(rrd_file->file_start + dontneed_start,
341 active_block - dontneed_start - 1, MADV_DONTNEED);
342 #endif
343 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
344 #ifdef HAVE_POSIX_FADVISE
345 posix_fadvise(rrd_file->fd, dontneed_start,
346 active_block - dontneed_start - 1,
347 POSIX_FADV_DONTNEED);
348 #endif
349 }
350 dontneed_start = active_block;
351 /* do not release 'hot' block if update for this RAA will occur
352 * within 10 minutes */
353 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
354 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
355 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
356 dontneed_start += _page_size;
357 }
358 rra_start +=
359 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
360 sizeof(rrd_value_t);
361 }
362 #ifdef USE_MADVISE
363 madvise(rrd_file->file_start + dontneed_start,
364 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
365 #endif
366 #ifdef HAVE_POSIX_FADVISE
367 posix_fadvise(rrd_file->fd, dontneed_start,
368 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
369 #endif
370 #if defined DEBUG && DEBUG > 1
371 mincore_print(rrd_file, "after");
372 #endif
373 }
375 int rrd_close(
376 rrd_file_t *rrd_file)
377 {
378 int ret;
380 #ifdef HAVE_MMAP
381 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
382 if (ret != 0)
383 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
384 ret = munmap(rrd_file->file_start, rrd_file->file_len);
385 if (ret != 0)
386 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
387 #endif
388 ret = close(rrd_file->fd);
389 if (ret != 0)
390 rrd_set_error("closing file: %s", rrd_strerror(errno));
391 free(rrd_file);
392 rrd_file = NULL;
393 return ret;
394 }
397 /* Set position of rrd_file. */
399 off_t rrd_seek(
400 rrd_file_t *rrd_file,
401 off_t off,
402 int whence)
403 {
404 off_t ret = 0;
406 #ifdef HAVE_MMAP
407 if (whence == SEEK_SET)
408 rrd_file->pos = off;
409 else if (whence == SEEK_CUR)
410 rrd_file->pos += off;
411 else if (whence == SEEK_END)
412 rrd_file->pos = rrd_file->file_len + off;
413 #else
414 ret = lseek(rrd_file->fd, off, whence);
415 if (ret < 0)
416 rrd_set_error("lseek: %s", rrd_strerror(errno));
417 rrd_file->pos = ret;
418 #endif
419 /* mimic fseek, which returns 0 upon success */
420 return ret < 0; //XXX: or just ret to mimic lseek
421 }
424 /* Get current position in rrd_file. */
426 inline off_t rrd_tell(
427 rrd_file_t *rrd_file)
428 {
429 return rrd_file->pos;
430 }
433 /* Read count bytes into buffer buf, starting at rrd_file->pos.
434 * Returns the number of bytes read or <0 on error. */
436 inline ssize_t rrd_read(
437 rrd_file_t *rrd_file,
438 void *buf,
439 size_t count)
440 {
441 #ifdef HAVE_MMAP
442 size_t _cnt = count;
443 ssize_t _surplus;
445 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
446 return 0;
447 if (buf == NULL)
448 return -1; /* EINVAL */
449 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
450 if (_surplus > 0) { /* short read */
451 _cnt -= _surplus;
452 }
453 if (_cnt == 0)
454 return 0; /* EOF */
455 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
457 rrd_file->pos += _cnt; /* mimmic read() semantics */
458 return _cnt;
459 #else
460 ssize_t ret;
462 ret = read(rrd_file->fd, buf, count);
463 if (ret > 0)
464 rrd_file->pos += ret; /* mimmic read() semantics */
465 return ret;
466 #endif
467 }
470 /* Write count bytes from buffer buf to the current position
471 * rrd_file->pos of rrd_file->fd.
472 * Returns the number of bytes written or <0 on error. */
474 inline ssize_t rrd_write(
475 rrd_file_t *rrd_file,
476 const void *buf,
477 size_t count)
478 {
479 #ifdef HAVE_MMAP
480 if (count == 0)
481 return 0;
482 if (buf == NULL)
483 return -1; /* EINVAL */
484 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
485 rrd_file->pos += count;
486 return count; /* mimmic write() semantics */
487 #else
488 ssize_t _sz = write(rrd_file->fd, buf, count);
490 if (_sz > 0)
491 rrd_file->pos += _sz;
492 return _sz;
493 #endif
494 }
497 /* flush all data pending to be written to FD. */
499 inline void rrd_flush(
500 rrd_file_t *rrd_file)
501 {
502 if (fdatasync(rrd_file->fd) != 0) {
503 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
504 rrd_strerror(errno));
505 }
506 }
509 /* Initialize RRD header. */
511 void rrd_init(
512 rrd_t *rrd)
513 {
514 rrd->stat_head = NULL;
515 rrd->ds_def = NULL;
516 rrd->rra_def = NULL;
517 rrd->live_head = NULL;
518 rrd->rra_ptr = NULL;
519 rrd->pdp_prep = NULL;
520 rrd->cdp_prep = NULL;
521 rrd->rrd_value = NULL;
522 }
525 /* free RRD header data. */
527 #ifdef HAVE_MMAP
528 inline void rrd_free(
529 rrd_t UNUSED(*rrd))
530 {
531 }
532 #else
533 void rrd_free(
534 rrd_t *rrd)
535 {
536 free(rrd->live_head);
537 free(rrd->stat_head);
538 free(rrd->ds_def);
539 free(rrd->rra_def);
540 free(rrd->rra_ptr);
541 free(rrd->pdp_prep);
542 free(rrd->cdp_prep);
543 free(rrd->rrd_value);
544 }
545 #endif
548 /* routine used by external libraries to free memory allocated by
549 * rrd library */
551 void rrd_freemem(
552 void *mem)
553 {
554 free(mem);
555 }
558 /* XXX: FIXME: missing documentation. */
559 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
561 int /*_rrd_*/ readfile(
562 const char *file_name,
563 char **buffer,
564 int skipfirst)
565 {
566 long writecnt = 0, totalcnt = MEMBLK;
567 long offset = 0;
568 FILE *input = NULL;
569 char c;
571 if ((strcmp("-", file_name) == 0)) {
572 input = stdin;
573 } else {
574 if ((input = fopen(file_name, "rb")) == NULL) {
575 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
576 return (-1);
577 }
578 }
579 if (skipfirst) {
580 do {
581 c = getc(input);
582 offset++;
583 } while (c != '\n' && !feof(input));
584 }
585 if (strcmp("-", file_name)) {
586 fseek(input, 0, SEEK_END);
587 /* have extra space for detecting EOF without realloc */
588 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
589 if (totalcnt < MEMBLK)
590 totalcnt = MEMBLK; /* sanitize */
591 fseek(input, offset * sizeof(char), SEEK_SET);
592 }
593 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
594 perror("Allocate Buffer:");
595 exit(1);
596 };
597 do {
598 writecnt +=
599 fread((*buffer) + writecnt, 1,
600 (totalcnt - writecnt) * sizeof(char), input);
601 if (writecnt >= totalcnt) {
602 totalcnt += MEMBLK;
603 if (((*buffer) =
604 rrd_realloc((*buffer),
605 (totalcnt + 4) * sizeof(char))) == NULL) {
606 perror("Realloc Buffer:");
607 exit(1);
608 };
609 }
610 } while (!feof(input));
611 (*buffer)[writecnt] = '\0';
612 if (strcmp("-", file_name) != 0) {
613 fclose(input);
614 };
615 return writecnt;
616 }