0dab23d5a60de454e0e09d75ba83410c5a9212d6
1 /*****************************************************************************
2 * RRDtool 1.3rc2 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #ifndef PAGE_START
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
39 #endif
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
46 */
48 rrd_file_t *rrd_open(
49 const char *const file_name,
50 rrd_t *rrd,
51 unsigned rdwr)
52 {
53 int flags = 0;
54 mode_t mode = S_IRUSR;
55 int version;
57 #ifdef HAVE_MMAP
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
60 char *data;
61 #endif
62 off_t offset = 0;
63 struct stat statb;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
71 free(rrd->stat_head);
72 }
73 rrd_init(rrd);
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
77 return NULL;
78 }
79 memset(rrd_file, 0, sizeof(rrd_file_t));
81 #ifdef DEBUG
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
86 exit(-1);
87 }
88 #endif
89 if (rdwr & RRD_READONLY) {
90 flags |= O_RDONLY;
91 #ifdef HAVE_MMAP
92 mm_flags = MAP_PRIVATE;
93 # ifdef MAP_NORESERVE
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
95 # endif
96 #endif
97 } else {
98 if (rdwr & RRD_READWRITE) {
99 mode |= S_IWUSR;
100 flags |= O_RDWR;
101 #ifdef HAVE_MMAP
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
104 #endif
105 }
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
108 }
109 }
110 if (rdwr & RRD_READAHEAD) {
111 #ifdef MAP_POPULATE
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
113 #endif
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
116 #endif
117 }
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
121 goto out_free;
122 }
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
128 goto out_close;
129 }
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
132 } else {
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
137 }
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
144 #endif
146 /*
147 if (rdwr & RRD_READWRITE)
148 {
149 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
150 rrd_set_error("failed to disable the stream buffer\n");
151 return (-1);
152 }
153 }
154 */
155 #ifdef HAVE_MMAP
156 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
157 rrd_file->fd, offset);
159 /* lets see if the first read worked */
160 if (data == MAP_FAILED) {
161 rrd_set_error("mmaping file '%s': %s", file_name,
162 rrd_strerror(errno));
163 goto out_close;
164 }
165 rrd_file->file_start = data;
166 if (rdwr & RRD_CREAT) {
167 memset(data, DNAN, newfile_size - 1);
168 goto out_done;
169 }
170 #endif
171 if (rdwr & RRD_CREAT)
172 goto out_done;
173 #ifdef USE_MADVISE
174 if (rdwr & RRD_COPY) {
175 /* We will read everything in a moment (copying) */
176 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
177 } else {
178 /* We do not need to read anything in for the moment */
179 madvise(data, rrd_file->file_len, MADV_RANDOM);
180 /* the stat_head will be needed soonish, so hint accordingly */
181 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
182 }
183 #endif
185 __rrd_read(rrd->stat_head, stat_head_t,
186 1);
188 /* lets do some test if we are on track ... */
189 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
190 rrd_set_error("'%s' is not an RRD file", file_name);
191 goto out_nullify_head;
192 }
194 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
195 rrd_set_error("This RRD was created on another architecture");
196 goto out_nullify_head;
197 }
199 version = atoi(rrd->stat_head->version);
201 if (version > atoi(RRD_VERSION)) {
202 rrd_set_error("can't handle RRD file version %s",
203 rrd->stat_head->version);
204 goto out_nullify_head;
205 }
206 #if defined USE_MADVISE
207 /* the ds_def will be needed soonish, so hint accordingly */
208 madvise(data + PAGE_START(offset),
209 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
210 #endif
211 __rrd_read(rrd->ds_def, ds_def_t,
212 rrd->stat_head->ds_cnt);
214 #if defined USE_MADVISE
215 /* the rra_def will be needed soonish, so hint accordingly */
216 madvise(data + PAGE_START(offset),
217 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
218 #endif
219 __rrd_read(rrd->rra_def, rra_def_t,
220 rrd->stat_head->rra_cnt);
222 /* handle different format for the live_head */
223 if (version < 3) {
224 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
225 if (rrd->live_head == NULL) {
226 rrd_set_error("live_head_t malloc");
227 goto out_close;
228 }
229 #ifdef HAVE_MMAP
230 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
231 offset += sizeof(long);
232 #else
233 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
234 #endif
235 rrd->live_head->last_up_usec = 0;
236 } else {
237 #if defined USE_MADVISE
238 /* the live_head will be needed soonish, so hint accordingly */
239 madvise(data + PAGE_START(offset),
240 sizeof(live_head_t), MADV_WILLNEED);
241 #endif
242 __rrd_read(rrd->live_head, live_head_t,
243 1);
244 }
245 __rrd_read(rrd->pdp_prep, pdp_prep_t,
246 rrd->stat_head->ds_cnt);
247 __rrd_read(rrd->cdp_prep, cdp_prep_t,
248 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
249 __rrd_read(rrd->rra_ptr, rra_ptr_t,
250 rrd->stat_head->rra_cnt);
252 rrd_file->header_len = offset;
253 rrd_file->pos = offset;
254 out_done:
255 return (rrd_file);
256 out_nullify_head:
257 rrd->stat_head = NULL;
258 out_close:
259 close(rrd_file->fd);
260 out_free:
261 free(rrd_file);
262 return NULL;
263 }
266 #if defined DEBUG && DEBUG > 1
267 /* Print list of in-core pages of a the current rrd_file. */
268 static
269 void mincore_print(
270 rrd_file_t *rrd_file,
271 char *mark)
272 {
273 #ifdef HAVE_MMAP
274 /* pretty print blocks in core */
275 off_t off;
276 unsigned char *vec;
277 ssize_t _page_size = sysconf(_SC_PAGESIZE);
279 off = rrd_file->file_len +
280 ((rrd_file->file_len + _page_size - 1) / _page_size);
281 vec = malloc(off);
282 if (vec != NULL) {
283 memset(vec, 0, off);
284 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
285 int prev;
286 unsigned is_in = 0, was_in = 0;
288 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
289 is_in = vec[off] & 1; /* if lsb set then is core resident */
290 if (off == 0)
291 was_in = is_in;
292 if (was_in != is_in) {
293 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
294 was_in ? "" : "not ", vec + prev, off - prev);
295 was_in = is_in;
296 prev = off;
297 }
298 }
299 fprintf(stderr,
300 "%s: %sin core: %p len %ld\n", mark,
301 was_in ? "" : "not ", vec + prev, off - prev);
302 } else
303 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
304 }
305 #else
306 fprintf(stderr, "sorry mincore only works with mmap");
307 #endif
308 }
309 #endif /* defined DEBUG && DEBUG > 1 */
312 /* drop cache except for the header and the active pages */
313 void rrd_dontneed(
314 rrd_file_t *rrd_file,
315 rrd_t *rrd)
316 {
317 unsigned long dontneed_start;
318 unsigned long rra_start;
319 unsigned long active_block;
320 unsigned long i;
321 ssize_t _page_size = sysconf(_SC_PAGESIZE);
323 #if defined DEBUG && DEBUG > 1
324 mincore_print(rrd_file, "before");
325 #endif
327 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
328 rra_start = rrd_file->header_len;
329 dontneed_start = PAGE_START(rra_start) + _page_size;
330 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
331 active_block =
332 PAGE_START(rra_start
333 + rrd->rra_ptr[i].cur_row
334 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
335 if (active_block > dontneed_start) {
336 #ifdef USE_MADVISE
337 madvise(rrd_file->file_start + dontneed_start,
338 active_block - dontneed_start - 1, MADV_DONTNEED);
339 #endif
340 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
341 #ifdef HAVE_POSIX_FADVISE
342 posix_fadvise(rrd_file->fd, dontneed_start,
343 active_block - dontneed_start - 1,
344 POSIX_FADV_DONTNEED);
345 #endif
346 }
347 dontneed_start = active_block;
348 /* do not release 'hot' block if update for this RAA will occur
349 * within 10 minutes */
350 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
351 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
352 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
353 dontneed_start += _page_size;
354 }
355 rra_start +=
356 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
357 sizeof(rrd_value_t);
358 }
359 #ifdef USE_MADVISE
360 madvise(rrd_file->file_start + dontneed_start,
361 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
362 #endif
363 #ifdef HAVE_POSIX_FADVISE
364 posix_fadvise(rrd_file->fd, dontneed_start,
365 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
366 #endif
367 #if defined DEBUG && DEBUG > 1
368 mincore_print(rrd_file, "after");
369 #endif
370 }
372 int rrd_close(
373 rrd_file_t *rrd_file)
374 {
375 int ret;
377 #ifdef HAVE_MMAP
378 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
379 if (ret != 0)
380 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
381 ret = munmap(rrd_file->file_start, rrd_file->file_len);
382 if (ret != 0)
383 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
384 #endif
385 ret = close(rrd_file->fd);
386 if (ret != 0)
387 rrd_set_error("closing file: %s", rrd_strerror(errno));
388 free(rrd_file);
389 rrd_file = NULL;
390 return ret;
391 }
394 /* Set position of rrd_file. */
396 off_t rrd_seek(
397 rrd_file_t *rrd_file,
398 off_t off,
399 int whence)
400 {
401 off_t ret = 0;
403 #ifdef HAVE_MMAP
404 if (whence == SEEK_SET)
405 rrd_file->pos = off;
406 else if (whence == SEEK_CUR)
407 rrd_file->pos += off;
408 else if (whence == SEEK_END)
409 rrd_file->pos = rrd_file->file_len + off;
410 #else
411 ret = lseek(rrd_file->fd, off, whence);
412 if (ret < 0)
413 rrd_set_error("lseek: %s", rrd_strerror(errno));
414 rrd_file->pos = ret;
415 #endif
416 /* mimic fseek, which returns 0 upon success */
417 return ret < 0; /*XXX: or just ret to mimic lseek */
418 }
421 /* Get current position in rrd_file. */
423 inline off_t rrd_tell(
424 rrd_file_t *rrd_file)
425 {
426 return rrd_file->pos;
427 }
430 /* Read count bytes into buffer buf, starting at rrd_file->pos.
431 * Returns the number of bytes read or <0 on error. */
433 inline ssize_t rrd_read(
434 rrd_file_t *rrd_file,
435 void *buf,
436 size_t count)
437 {
438 #ifdef HAVE_MMAP
439 size_t _cnt = count;
440 ssize_t _surplus;
442 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
443 return 0;
444 if (buf == NULL)
445 return -1; /* EINVAL */
446 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
447 if (_surplus > 0) { /* short read */
448 _cnt -= _surplus;
449 }
450 if (_cnt == 0)
451 return 0; /* EOF */
452 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
454 rrd_file->pos += _cnt; /* mimmic read() semantics */
455 return _cnt;
456 #else
457 ssize_t ret;
459 ret = read(rrd_file->fd, buf, count);
460 if (ret > 0)
461 rrd_file->pos += ret; /* mimmic read() semantics */
462 return ret;
463 #endif
464 }
467 /* Write count bytes from buffer buf to the current position
468 * rrd_file->pos of rrd_file->fd.
469 * Returns the number of bytes written or <0 on error. */
471 inline ssize_t rrd_write(
472 rrd_file_t *rrd_file,
473 const void *buf,
474 size_t count)
475 {
476 #ifdef HAVE_MMAP
477 if (count == 0)
478 return 0;
479 if (buf == NULL)
480 return -1; /* EINVAL */
481 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
482 rrd_file->pos += count;
483 return count; /* mimmic write() semantics */
484 #else
485 ssize_t _sz = write(rrd_file->fd, buf, count);
487 if (_sz > 0)
488 rrd_file->pos += _sz;
489 return _sz;
490 #endif
491 }
494 /* flush all data pending to be written to FD. */
496 inline void rrd_flush(
497 rrd_file_t *rrd_file)
498 {
499 if (fdatasync(rrd_file->fd) != 0) {
500 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
501 rrd_strerror(errno));
502 }
503 }
506 /* Initialize RRD header. */
508 void rrd_init(
509 rrd_t *rrd)
510 {
511 rrd->stat_head = NULL;
512 rrd->ds_def = NULL;
513 rrd->rra_def = NULL;
514 rrd->live_head = NULL;
515 rrd->rra_ptr = NULL;
516 rrd->pdp_prep = NULL;
517 rrd->cdp_prep = NULL;
518 rrd->rrd_value = NULL;
519 }
522 /* free RRD header data. */
524 #ifdef HAVE_MMAP
525 inline void rrd_free(
526 rrd_t UNUSED(*rrd))
527 {
528 }
529 #else
530 void rrd_free(
531 rrd_t *rrd)
532 {
533 free(rrd->live_head);
534 free(rrd->stat_head);
535 free(rrd->ds_def);
536 free(rrd->rra_def);
537 free(rrd->rra_ptr);
538 free(rrd->pdp_prep);
539 free(rrd->cdp_prep);
540 free(rrd->rrd_value);
541 }
542 #endif
545 /* routine used by external libraries to free memory allocated by
546 * rrd library */
548 void rrd_freemem(
549 void *mem)
550 {
551 free(mem);
552 }
555 /* XXX: FIXME: missing documentation. */
556 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
558 int /*_rrd_*/ readfile(
559 const char *file_name,
560 char **buffer,
561 int skipfirst)
562 {
563 long writecnt = 0, totalcnt = MEMBLK;
564 long offset = 0;
565 FILE *input = NULL;
566 char c;
568 if ((strcmp("-", file_name) == 0)) {
569 input = stdin;
570 } else {
571 if ((input = fopen(file_name, "rb")) == NULL) {
572 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
573 return (-1);
574 }
575 }
576 if (skipfirst) {
577 do {
578 c = getc(input);
579 offset++;
580 } while (c != '\n' && !feof(input));
581 }
582 if (strcmp("-", file_name)) {
583 fseek(input, 0, SEEK_END);
584 /* have extra space for detecting EOF without realloc */
585 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
586 if (totalcnt < MEMBLK)
587 totalcnt = MEMBLK; /* sanitize */
588 fseek(input, offset * sizeof(char), SEEK_SET);
589 }
590 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
591 perror("Allocate Buffer:");
592 exit(1);
593 };
594 do {
595 writecnt +=
596 fread((*buffer) + writecnt, 1,
597 (totalcnt - writecnt) * sizeof(char), input);
598 if (writecnt >= totalcnt) {
599 totalcnt += MEMBLK;
600 if (((*buffer) =
601 rrd_realloc((*buffer),
602 (totalcnt + 4) * sizeof(char))) == NULL) {
603 perror("Realloc Buffer:");
604 exit(1);
605 };
606 }
607 } while (!feof(input));
608 (*buffer)[writecnt] = '\0';
609 if (strcmp("-", file_name) != 0) {
610 fclose(input);
611 };
612 return writecnt;
613 }