d701204b4a492994423a27d9a117806f567614e1
1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 //#define DEBUG 2
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 //#define ONE_PAGE 1
18 /* Avoid calling madvise on areas that were already hinted. May be benefical if
19 * your syscalls are very slow */
20 #define CHECK_MADVISE_OVERLAPS 1
22 #ifdef HAVE_MMAP
23 /* the cast to void* is there to avoid this warning seen on ia64 with certain
24 versions of gcc: 'cast increases required alignment of target type'
25 */
26 #define __rrd_read(dst, dst_t, cnt) \
27 (dst) = (dst_t*)(void*) (data + offset); \
28 offset += sizeof(dst_t) * (cnt)
29 #else
30 #define __rrd_read(dst, dst_t, cnt) \
31 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
32 rrd_set_error(#dst " malloc"); \
33 goto out_nullify_head; \
34 } \
35 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
36 #endif
38 /* get the address of the start of this page */
39 #ifndef PAGE_START
40 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
41 #endif
43 #ifdef HAVE_MMAP
44 /* vector of last madvise hint */
45 typedef struct _madvise_vec_t {
46 void *start;
47 ssize_t length;
48 } _madvise_vec_t;
49 _madvise_vec_t _madv_vec = { NULL, 0 };
50 #endif
52 #if defined CHECK_MADVISE_OVERLAPS
53 #define _madvise(_start, _off, _hint) \
54 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
55 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
56 madvise((_start), (_off), (_hint)); \
57 }
58 #else
59 #define _madvise(_start, _off, _hint) \
60 madvise((_start), (_off), (_hint))
61 #endif
63 /* Open a database file, return its header and an open filehandle,
64 * positioned to the first cdp in the first rra.
65 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
66 * before returning an error. Do not call rrd_close upon failure of rrd_open.
67 */
69 rrd_file_t *rrd_open(
70 const char *const file_name,
71 rrd_t *rrd,
72 unsigned rdwr)
73 {
74 int flags = 0;
75 mode_t mode = S_IRUSR;
76 int version;
78 #ifdef HAVE_MMAP
79 ssize_t _page_size = sysconf(_SC_PAGESIZE);
80 int mm_prot = PROT_READ, mm_flags = 0;
81 char *data;
82 #endif
83 off_t offset = 0;
84 struct stat statb;
85 rrd_file_t *rrd_file = NULL;
86 off_t newfile_size = 0;
88 if (rdwr & RRD_CREAT) {
89 /* yes bad inline signaling alert, we are using the
90 floatcookie to pass the size in ... only used in resize */
91 newfile_size = (off_t) rrd->stat_head->float_cookie;
92 free(rrd->stat_head);
93 }
94 rrd_init(rrd);
95 rrd_file = malloc(sizeof(rrd_file_t));
96 if (rrd_file == NULL) {
97 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
98 return NULL;
99 }
100 memset(rrd_file, 0, sizeof(rrd_file_t));
102 #ifdef DEBUG
103 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
104 (RRD_READONLY | RRD_READWRITE)) {
105 /* Both READONLY and READWRITE were given, which is invalid. */
106 rrd_set_error("in read/write request mask");
107 exit(-1);
108 }
109 #endif
110 if (rdwr & RRD_READONLY) {
111 flags |= O_RDONLY;
112 #ifdef HAVE_MMAP
113 mm_flags = MAP_PRIVATE;
114 # ifdef MAP_NORESERVE
115 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
116 # endif
117 #endif
118 } else {
119 if (rdwr & RRD_READWRITE) {
120 mode |= S_IWUSR;
121 flags |= O_RDWR;
122 #ifdef HAVE_MMAP
123 mm_flags = MAP_SHARED;
124 mm_prot |= PROT_WRITE;
125 #endif
126 }
127 if (rdwr & RRD_CREAT) {
128 flags |= (O_CREAT | O_TRUNC);
129 }
130 }
131 if (rdwr & RRD_READAHEAD) {
132 #ifdef MAP_POPULATE
133 mm_flags |= MAP_POPULATE; /* populate ptes and data */
134 #endif
135 #if defined MAP_NONBLOCK
136 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
137 #endif
138 #ifdef USE_DIRECT_IO
139 } else {
140 flags |= O_DIRECT;
141 #endif
142 }
143 #ifdef O_NONBLOCK
144 flags |= O_NONBLOCK;
145 #endif
147 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
148 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
149 goto out_free;
150 }
152 /* Better try to avoid seeks as much as possible. stat may be heavy but
153 * many concurrent seeks are even worse. */
154 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
155 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
156 goto out_close;
157 }
158 if (newfile_size == 0) {
159 rrd_file->file_len = statb.st_size;
160 } else {
161 rrd_file->file_len = newfile_size;
162 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
163 write(rrd_file->fd, "\0", 1); /* poke */
164 lseek(rrd_file->fd, 0, SEEK_SET);
165 }
166 #ifdef HAVE_POSIX_FADVISE
167 /* In general we need no read-ahead when dealing with rrd_files.
168 When we stop reading, it is highly unlikely that we start up again.
169 In this manner we actually save time and diskaccess (and buffer cache).
170 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
171 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
172 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
173 rrd_strerror(errno));
174 goto out_close;
175 }
176 #endif
178 /*
179 if (rdwr & RRD_READWRITE)
180 {
181 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
182 rrd_set_error("failed to disable the stream buffer\n");
183 return (-1);
184 }
185 }
186 */
187 #ifdef HAVE_MMAP
188 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
189 rrd_file->fd, offset);
191 /* lets see if the first read worked */
192 if (data == MAP_FAILED) {
193 rrd_set_error("mmaping file '%s': %s", file_name,
194 rrd_strerror(errno));
195 goto out_close;
196 }
197 rrd_file->file_start = data;
198 if (rdwr & RRD_CREAT) {
199 memset(data, DNAN, newfile_size - 1);
200 goto out_done;
201 }
202 #endif
203 if (rdwr & RRD_CREAT)
204 goto out_done;
205 #ifdef USE_MADVISE
206 if (rdwr & RRD_COPY) {
207 /* We will read everything in a moment (copying) */
208 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
209 } else {
210 # ifndef ONE_PAGE
211 /* We do not need to read anything in for the moment */
212 _madvise(data, rrd_file->file_len, MADV_RANDOM);
213 /* the stat_head will be needed soonish, so hint accordingly */
214 _madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
216 # else
217 /* alternatively: keep 1 page worth of data, likely headers,
218 * don't need the rest. */
219 _madvise(data, _page_size, MADV_WILLNEED | MADV_SEQUENTIAL);
220 _madvise(data + _page_size, (rrd_file->file_len >= _page_size)
221 ? rrd_file->file_len - _page_size : 0, MADV_DONTNEED);
222 # endif
223 }
224 #endif
226 __rrd_read(rrd->stat_head, stat_head_t,
227 1);
229 /* lets do some test if we are on track ... */
230 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
231 rrd_set_error("'%s' is not an RRD file", file_name);
232 goto out_nullify_head;
233 }
235 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
236 rrd_set_error("This RRD was created on another architecture");
237 goto out_nullify_head;
238 }
240 version = atoi(rrd->stat_head->version);
242 if (version > atoi(RRD_VERSION)) {
243 rrd_set_error("can't handle RRD file version %s",
244 rrd->stat_head->version);
245 goto out_nullify_head;
246 }
247 #if defined USE_MADVISE && !defined ONE_PAGE
248 /* the ds_def will be needed soonish, so hint accordingly */
249 _madvise(data + PAGE_START(offset),
250 sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
251 MADV_WILLNEED);
252 #endif
253 __rrd_read(rrd->ds_def, ds_def_t,
254 rrd->stat_head->ds_cnt);
256 #if defined USE_MADVISE && !defined ONE_PAGE
257 /* the rra_def will be needed soonish, so hint accordingly */
258 _madvise(data + PAGE_START(offset),
259 sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
260 MADV_WILLNEED);
261 #endif
262 __rrd_read(rrd->rra_def, rra_def_t,
263 rrd->stat_head->rra_cnt);
265 /* handle different format for the live_head */
266 if (version < 3) {
267 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
268 if (rrd->live_head == NULL) {
269 rrd_set_error("live_head_t malloc");
270 goto out_close;
271 }
272 #ifdef HAVE_MMAP
273 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
274 offset += sizeof(long);
275 #else
276 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
277 #endif
278 rrd->live_head->last_up_usec = 0;
279 } else {
280 #if defined USE_MADVISE && !defined ONE_PAGE
281 /* the live_head will be needed soonish, so hint accordingly */
282 _madvise(data + PAGE_START(offset),
283 sizeof(live_head_t), MADV_WILLNEED);
284 #endif
285 __rrd_read(rrd->live_head, live_head_t,
286 1);
287 }
288 //XXX: This doesn't look like it needs madvise
289 __rrd_read(rrd->pdp_prep, pdp_prep_t,
290 rrd->stat_head->ds_cnt);
292 //XXX: This could benefit from madvise()ing
293 __rrd_read(rrd->cdp_prep, cdp_prep_t,
294 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
296 //XXX: This could benefit from madvise()ing
297 __rrd_read(rrd->rra_ptr, rra_ptr_t,
298 rrd->stat_head->rra_cnt);
300 rrd_file->header_len = offset;
301 rrd_file->pos = offset;
302 out_done:
303 return (rrd_file);
304 out_nullify_head:
305 rrd->stat_head = NULL;
306 out_close:
307 close(rrd_file->fd);
308 out_free:
309 free(rrd_file);
310 return NULL;
311 }
314 /* Close a reference to an rrd_file. */
315 static
316 void mincore_print(rrd_file_t *rrd_file,char * mark){
317 #ifdef HAVE_MMAP
318 /* pretty print blocks in core */
319 off_t off;
320 unsigned char *vec;
321 ssize_t _page_size = sysconf(_SC_PAGESIZE);
323 off = rrd_file->file_len +
324 ((rrd_file->file_len + _page_size - 1) / _page_size);
325 vec = malloc(off);
326 if (vec != NULL) {
327 memset(vec, 0, off);
328 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
329 int prev;
330 unsigned is_in = 0, was_in = 0;
332 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
333 is_in = vec[off] & 1; /* if lsb set then is core resident */
334 if (off == 0)
335 was_in = is_in;
336 if (was_in != is_in) {
337 fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
338 was_in ? "" : "not ", vec + prev, off - prev);
339 was_in = is_in;
340 prev = off;
341 }
342 }
343 fprintf(stderr,
344 "%s: %sin core: %p len %ld\n", mark,
345 was_in ? "" : "not ", vec + prev, off - prev);
346 } else
347 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
348 }
349 #else
350 fprintf(stderr, "sorry mincore only works with mmap");
351 #endif
352 }
355 /* drop cache except for the header and the active pages */
356 void
357 rrd_dontneed (
358 rrd_file_t *rrd_file,
359 rrd_t *rrd){
360 unsigned long dontneed_start;
361 unsigned long rra_start;
362 unsigned long active_block;
363 unsigned long i;
364 ssize_t _page_size = sysconf(_SC_PAGESIZE);
366 #if defined DEBUG && DEBUG > 1
367 mincore_print(rrd_file,"before");
368 #endif
370 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
371 rra_start = rrd_file->header_len;
372 dontneed_start = PAGE_START(rra_start)+_page_size;
373 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
374 active_block =
375 PAGE_START(rra_start
376 + rrd->rra_ptr[i].cur_row
377 * rrd->stat_head->ds_cnt
378 * sizeof(rrd_value_t));
379 if (active_block > dontneed_start){
380 #ifdef USE_MADVISE
381 _madvise(rrd_file->file_start + dontneed_start,
382 active_block-dontneed_start-1,
383 MADV_DONTNEED);
384 #endif
385 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
386 #ifdef HAVE_POSIX_FADVISE
387 posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
388 #endif
389 }
390 dontneed_start = active_block + _page_size;
391 rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
392 }
393 #ifdef USE_MADVISE
394 _madvise(rrd_file->file_start + dontneed_start,
395 rrd_file->file_len - dontneed_start,
396 MADV_DONTNEED);
397 #endif
398 #ifdef HAVE_POSIX_FADVISE
399 posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
400 #endif
401 #if defined DEBUG && DEBUG > 1
402 mincore_print(rrd_file,"after");
403 #endif
404 }
406 int rrd_close(
407 rrd_file_t *rrd_file)
408 {
409 int ret;
410 #ifdef HAVE_MMAP
411 ret = munmap(rrd_file->file_start, rrd_file->file_len);
412 if (ret != 0)
413 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
414 #endif
415 ret = close(rrd_file->fd);
416 if (ret != 0)
417 rrd_set_error("closing file: %s", rrd_strerror(errno));
418 free(rrd_file);
419 rrd_file = NULL;
420 return ret;
421 }
424 /* Set position of rrd_file. */
426 off_t rrd_seek(
427 rrd_file_t *rrd_file,
428 off_t off,
429 int whence)
430 {
431 off_t ret = 0;
433 #ifdef HAVE_MMAP
434 if (whence == SEEK_SET)
435 rrd_file->pos = off;
436 else if (whence == SEEK_CUR)
437 rrd_file->pos += off;
438 else if (whence == SEEK_END)
439 rrd_file->pos = rrd_file->file_len + off;
440 #else
441 ret = lseek(rrd_file->fd, off, whence);
442 if (ret < 0)
443 rrd_set_error("lseek: %s", rrd_strerror(errno));
444 rrd_file->pos = ret;
445 #endif
446 //XXX: mimic fseek, which returns 0 upon success
447 return ret == -1; //XXX: or just ret to mimic lseek
448 }
451 /* Get current position in rrd_file. */
453 inline off_t rrd_tell(
454 rrd_file_t *rrd_file)
455 {
456 return rrd_file->pos;
457 }
460 /* read count bytes into buffer buf, starting at rrd_file->pos.
461 * Returns the number of bytes read or <0 on error. */
463 inline ssize_t rrd_read(
464 rrd_file_t *rrd_file,
465 void *buf,
466 size_t count)
467 {
468 #ifdef HAVE_MMAP
469 size_t _cnt = count;
470 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
472 if (_surplus > 0) { /* short read */
473 _cnt -= _surplus;
474 }
475 if (_cnt == 0)
476 return 0; /* EOF */
477 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
479 rrd_file->pos += _cnt; /* mimmic read() semantics */
480 return _cnt;
481 #else
482 ssize_t ret;
484 ret = read(rrd_file->fd, buf, count);
485 if (ret > 0)
486 rrd_file->pos += ret; /* mimmic read() semantics */
487 return ret;
488 #endif
489 }
492 /* write count bytes from buffer buf to the current position
493 * rrd_file->pos of rrd_file->fd.
494 * Returns the number of bytes written. */
496 inline ssize_t rrd_write(
497 rrd_file_t *rrd_file,
498 const void *buf,
499 size_t count)
500 {
501 #ifdef HAVE_MMAP
502 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
503 rrd_file->pos += count;
504 return count; /* mimmic write() semantics */
505 #else
506 ssize_t _sz = write(rrd_file->fd, buf, count);
508 if (_sz > 0)
509 rrd_file->pos += _sz;
510 return _sz;
511 #endif
512 }
515 /* flush all data pending to be written to FD. */
517 inline void rrd_flush(
518 rrd_file_t *rrd_file)
519 {
520 if (fdatasync(rrd_file->fd) != 0) {
521 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
522 rrd_strerror(errno));
523 }
524 }
527 /* Initialize RRD header. */
529 void rrd_init(
530 rrd_t *rrd)
531 {
532 rrd->stat_head = NULL;
533 rrd->ds_def = NULL;
534 rrd->rra_def = NULL;
535 rrd->live_head = NULL;
536 rrd->rra_ptr = NULL;
537 rrd->pdp_prep = NULL;
538 rrd->cdp_prep = NULL;
539 rrd->rrd_value = NULL;
540 }
543 /* free RRD header data. */
545 #ifdef HAVE_MMAP
546 inline void rrd_free(
547 rrd_t UNUSED(*rrd))
548 {
549 }
550 #else
551 void rrd_free(
552 rrd_t *rrd)
553 {
554 free(rrd->live_head);
555 free(rrd->stat_head);
556 free(rrd->ds_def);
557 free(rrd->rra_def);
558 free(rrd->rra_ptr);
559 free(rrd->pdp_prep);
560 free(rrd->cdp_prep);
561 free(rrd->rrd_value);
562 }
563 #endif
566 /* routine used by external libraries to free memory allocated by
567 * rrd library */
569 void rrd_freemem(
570 void *mem)
571 {
572 free(mem);
573 }
576 /* XXX: FIXME: missing documentation. */
577 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
579 int /*_rrd_*/ readfile(
580 const char *file_name,
581 char **buffer,
582 int skipfirst)
583 {
584 long writecnt = 0, totalcnt = MEMBLK;
585 long offset = 0;
586 FILE *input = NULL;
587 char c;
589 if ((strcmp("-", file_name) == 0)) {
590 input = stdin;
591 } else {
592 if ((input = fopen(file_name, "rb")) == NULL) {
593 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
594 return (-1);
595 }
596 }
597 if (skipfirst) {
598 do {
599 c = getc(input);
600 offset++;
601 } while (c != '\n' && !feof(input));
602 }
603 if (strcmp("-", file_name)) {
604 fseek(input, 0, SEEK_END);
605 /* have extra space for detecting EOF without realloc */
606 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
607 if (totalcnt < MEMBLK)
608 totalcnt = MEMBLK; /* sanitize */
609 fseek(input, offset * sizeof(char), SEEK_SET);
610 }
611 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
612 perror("Allocate Buffer:");
613 exit(1);
614 };
615 do {
616 writecnt +=
617 fread((*buffer) + writecnt, 1,
618 (totalcnt - writecnt) * sizeof(char), input);
619 if (writecnt >= totalcnt) {
620 totalcnt += MEMBLK;
621 if (((*buffer) =
622 rrd_realloc((*buffer),
623 (totalcnt + 4) * sizeof(char))) == NULL) {
624 perror("Realloc Buffer:");
625 exit(1);
626 };
627 }
628 } while (!feof(input));
629 (*buffer)[writecnt] = '\0';
630 if (strcmp("-", file_name) != 0) {
631 fclose(input);
632 };
633 return writecnt;
634 }