1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #ifndef PAGE_START
38 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
39 #endif
42 /* Open a database file, return its header and an open filehandle,
43 * positioned to the first cdp in the first rra.
44 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
45 * before returning an error. Do not call rrd_close upon failure of rrd_open.
46 */
48 rrd_file_t *rrd_open(
49 const char *const file_name,
50 rrd_t *rrd,
51 unsigned rdwr)
52 {
53 int flags = 0;
54 mode_t mode = S_IRUSR;
55 int version;
57 #ifdef HAVE_MMAP
58 ssize_t _page_size = sysconf(_SC_PAGESIZE);
59 int mm_prot = PROT_READ, mm_flags = 0;
60 char *data;
61 #endif
62 off_t offset = 0;
63 struct stat statb;
64 rrd_file_t *rrd_file = NULL;
65 off_t newfile_size = 0;
67 if (rdwr & RRD_CREAT) {
68 /* yes bad inline signaling alert, we are using the
69 floatcookie to pass the size in ... only used in resize */
70 newfile_size = (off_t) rrd->stat_head->float_cookie;
71 free(rrd->stat_head);
72 }
73 rrd_init(rrd);
74 rrd_file = malloc(sizeof(rrd_file_t));
75 if (rrd_file == NULL) {
76 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
77 return NULL;
78 }
79 memset(rrd_file, 0, sizeof(rrd_file_t));
81 #ifdef DEBUG
82 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
83 (RRD_READONLY | RRD_READWRITE)) {
84 /* Both READONLY and READWRITE were given, which is invalid. */
85 rrd_set_error("in read/write request mask");
86 exit(-1);
87 }
88 #endif
89 if (rdwr & RRD_READONLY) {
90 flags |= O_RDONLY;
91 #ifdef HAVE_MMAP
92 mm_flags = MAP_PRIVATE;
93 # ifdef MAP_NORESERVE
94 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
95 # endif
96 #endif
97 } else {
98 if (rdwr & RRD_READWRITE) {
99 mode |= S_IWUSR;
100 flags |= O_RDWR;
101 #ifdef HAVE_MMAP
102 mm_flags = MAP_SHARED;
103 mm_prot |= PROT_WRITE;
104 #endif
105 }
106 if (rdwr & RRD_CREAT) {
107 flags |= (O_CREAT | O_TRUNC);
108 }
109 }
110 if (rdwr & RRD_READAHEAD) {
111 #ifdef MAP_POPULATE
112 mm_flags |= MAP_POPULATE; /* populate ptes and data */
113 #endif
114 #if defined MAP_NONBLOCK
115 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
116 #endif
117 }
119 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
120 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
121 goto out_free;
122 }
124 /* Better try to avoid seeks as much as possible. stat may be heavy but
125 * many concurrent seeks are even worse. */
126 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
127 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
128 goto out_close;
129 }
130 if (newfile_size == 0) {
131 rrd_file->file_len = statb.st_size;
132 } else {
133 rrd_file->file_len = newfile_size;
134 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
135 write(rrd_file->fd, "\0", 1); /* poke */
136 lseek(rrd_file->fd, 0, SEEK_SET);
137 }
138 #ifdef HAVE_POSIX_FADVISE
139 /* In general we need no read-ahead when dealing with rrd_files.
140 When we stop reading, it is highly unlikely that we start up again.
141 In this manner we actually save time and diskaccess (and buffer cache).
142 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
143 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
144 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
145 rrd_strerror(errno));
146 goto out_close;
147 }
148 #endif
150 /*
151 if (rdwr & RRD_READWRITE)
152 {
153 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
154 rrd_set_error("failed to disable the stream buffer\n");
155 return (-1);
156 }
157 }
158 */
159 #ifdef HAVE_MMAP
160 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
161 rrd_file->fd, offset);
163 /* lets see if the first read worked */
164 if (data == MAP_FAILED) {
165 rrd_set_error("mmaping file '%s': %s", file_name,
166 rrd_strerror(errno));
167 goto out_close;
168 }
169 rrd_file->file_start = data;
170 if (rdwr & RRD_CREAT) {
171 memset(data, DNAN, newfile_size - 1);
172 goto out_done;
173 }
174 #endif
175 if (rdwr & RRD_CREAT)
176 goto out_done;
177 #ifdef USE_MADVISE
178 if (rdwr & RRD_COPY) {
179 /* We will read everything in a moment (copying) */
180 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
181 } else {
182 /* We do not need to read anything in for the moment */
183 madvise(data, rrd_file->file_len, MADV_RANDOM);
184 /* the stat_head will be needed soonish, so hint accordingly */
185 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
186 }
187 #endif
189 __rrd_read(rrd->stat_head, stat_head_t,
190 1);
192 /* lets do some test if we are on track ... */
193 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
194 rrd_set_error("'%s' is not an RRD file", file_name);
195 goto out_nullify_head;
196 }
198 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
199 rrd_set_error("This RRD was created on another architecture");
200 goto out_nullify_head;
201 }
203 version = atoi(rrd->stat_head->version);
205 if (version > atoi(RRD_VERSION)) {
206 rrd_set_error("can't handle RRD file version %s",
207 rrd->stat_head->version);
208 goto out_nullify_head;
209 }
210 #if defined USE_MADVISE
211 /* the ds_def will be needed soonish, so hint accordingly */
212 madvise(data + PAGE_START(offset),
213 sizeof(ds_def_t) * rrd->stat_head->ds_cnt,
214 MADV_WILLNEED);
215 #endif
216 __rrd_read(rrd->ds_def, ds_def_t,
217 rrd->stat_head->ds_cnt);
219 #if defined USE_MADVISE
220 /* the rra_def will be needed soonish, so hint accordingly */
221 madvise(data + PAGE_START(offset),
222 sizeof(rra_def_t) * rrd->stat_head->rra_cnt,
223 MADV_WILLNEED);
224 #endif
225 __rrd_read(rrd->rra_def, rra_def_t,
226 rrd->stat_head->rra_cnt);
228 /* handle different format for the live_head */
229 if (version < 3) {
230 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
231 if (rrd->live_head == NULL) {
232 rrd_set_error("live_head_t malloc");
233 goto out_close;
234 }
235 #ifdef HAVE_MMAP
236 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
237 offset += sizeof(long);
238 #else
239 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
240 #endif
241 rrd->live_head->last_up_usec = 0;
242 } else {
243 #if defined USE_MADVISE
244 /* the live_head will be needed soonish, so hint accordingly */
245 madvise(data + PAGE_START(offset),
246 sizeof(live_head_t), MADV_WILLNEED);
247 #endif
248 __rrd_read(rrd->live_head, live_head_t,
249 1);
250 }
251 //XXX: This doesn't look like it needs madvise
252 __rrd_read(rrd->pdp_prep, pdp_prep_t,
253 rrd->stat_head->ds_cnt);
255 //XXX: This could benefit from madvise()ing
256 __rrd_read(rrd->cdp_prep, cdp_prep_t,
257 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
259 //XXX: This could benefit from madvise()ing
260 __rrd_read(rrd->rra_ptr, rra_ptr_t,
261 rrd->stat_head->rra_cnt);
263 rrd_file->header_len = offset;
264 rrd_file->pos = offset;
265 out_done:
266 return (rrd_file);
267 out_nullify_head:
268 rrd->stat_head = NULL;
269 out_close:
270 close(rrd_file->fd);
271 out_free:
272 free(rrd_file);
273 return NULL;
274 }
277 /* Close a reference to an rrd_file. */
278 static
279 void mincore_print(rrd_file_t *rrd_file,char * mark){
280 #ifdef HAVE_MMAP
281 /* pretty print blocks in core */
282 off_t off;
283 unsigned char *vec;
284 ssize_t _page_size = sysconf(_SC_PAGESIZE);
286 off = rrd_file->file_len +
287 ((rrd_file->file_len + _page_size - 1) / _page_size);
288 vec = malloc(off);
289 if (vec != NULL) {
290 memset(vec, 0, off);
291 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
292 int prev;
293 unsigned is_in = 0, was_in = 0;
295 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
296 is_in = vec[off] & 1; /* if lsb set then is core resident */
297 if (off == 0)
298 was_in = is_in;
299 if (was_in != is_in) {
300 fprintf(stderr, "%s: %sin core: %p len %ld\n",mark,
301 was_in ? "" : "not ", vec + prev, off - prev);
302 was_in = is_in;
303 prev = off;
304 }
305 }
306 fprintf(stderr,
307 "%s: %sin core: %p len %ld\n", mark,
308 was_in ? "" : "not ", vec + prev, off - prev);
309 } else
310 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
311 }
312 #else
313 fprintf(stderr, "sorry mincore only works with mmap");
314 #endif
315 }
318 /* drop cache except for the header and the active pages */
319 void
320 rrd_dontneed (
321 rrd_file_t *rrd_file,
322 rrd_t *rrd){
323 unsigned long dontneed_start;
324 unsigned long rra_start;
325 unsigned long active_block;
326 unsigned long i;
327 ssize_t _page_size = sysconf(_SC_PAGESIZE);
329 #if defined DEBUG && DEBUG > 1
330 mincore_print(rrd_file,"before");
331 #endif
333 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
334 rra_start = rrd_file->header_len;
335 dontneed_start = PAGE_START(rra_start)+_page_size;
336 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
337 active_block =
338 PAGE_START(rra_start
339 + rrd->rra_ptr[i].cur_row
340 * rrd->stat_head->ds_cnt
341 * sizeof(rrd_value_t));
342 if (active_block > dontneed_start) {
343 #ifdef USE_MADVISE
344 madvise(rrd_file->file_start + dontneed_start,
345 active_block-dontneed_start-1,
346 MADV_DONTNEED);
347 #endif
348 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
349 #ifdef HAVE_POSIX_FADVISE
350 posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED);
351 #endif
352 }
353 dontneed_start = active_block;
354 /* do not relase 'hot' block if update for this RAA will occure within 10 minutes */
355 if ( rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
356 rrd->live_head->last_up % (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt) < 10*60 ){
357 dontneed_start += _page_size;
358 }
359 rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t);
360 }
361 #ifdef USE_MADVISE
362 madvise(rrd_file->file_start + dontneed_start,
363 rrd_file->file_len - dontneed_start,
364 MADV_DONTNEED);
365 #endif
366 #ifdef HAVE_POSIX_FADVISE
367 posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED);
368 #endif
369 #if defined DEBUG && DEBUG > 1
370 mincore_print(rrd_file,"after");
371 #endif
372 }
374 int rrd_close(
375 rrd_file_t *rrd_file)
376 {
377 int ret;
378 #ifdef HAVE_MMAP
379 ret = munmap(rrd_file->file_start, rrd_file->file_len);
380 if (ret != 0)
381 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
382 #endif
383 ret = close(rrd_file->fd);
384 if (ret != 0)
385 rrd_set_error("closing file: %s", rrd_strerror(errno));
386 free(rrd_file);
387 rrd_file = NULL;
388 return ret;
389 }
392 /* Set position of rrd_file. */
394 off_t rrd_seek(
395 rrd_file_t *rrd_file,
396 off_t off,
397 int whence)
398 {
399 off_t ret = 0;
401 #ifdef HAVE_MMAP
402 if (whence == SEEK_SET)
403 rrd_file->pos = off;
404 else if (whence == SEEK_CUR)
405 rrd_file->pos += off;
406 else if (whence == SEEK_END)
407 rrd_file->pos = rrd_file->file_len + off;
408 #else
409 ret = lseek(rrd_file->fd, off, whence);
410 if (ret < 0)
411 rrd_set_error("lseek: %s", rrd_strerror(errno));
412 rrd_file->pos = ret;
413 #endif
414 //XXX: mimic fseek, which returns 0 upon success
415 return ret == -1; //XXX: or just ret to mimic lseek
416 }
419 /* Get current position in rrd_file. */
421 inline off_t rrd_tell(
422 rrd_file_t *rrd_file)
423 {
424 return rrd_file->pos;
425 }
428 /* read count bytes into buffer buf, starting at rrd_file->pos.
429 * Returns the number of bytes read or <0 on error. */
431 inline ssize_t rrd_read(
432 rrd_file_t *rrd_file,
433 void *buf,
434 size_t count)
435 {
436 #ifdef HAVE_MMAP
437 size_t _cnt = count;
438 ssize_t _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
440 if (_surplus > 0) { /* short read */
441 _cnt -= _surplus;
442 }
443 if (_cnt == 0)
444 return 0; /* EOF */
445 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
447 rrd_file->pos += _cnt; /* mimmic read() semantics */
448 return _cnt;
449 #else
450 ssize_t ret;
452 ret = read(rrd_file->fd, buf, count);
453 if (ret > 0)
454 rrd_file->pos += ret; /* mimmic read() semantics */
455 return ret;
456 #endif
457 }
460 /* write count bytes from buffer buf to the current position
461 * rrd_file->pos of rrd_file->fd.
462 * Returns the number of bytes written. */
464 inline ssize_t rrd_write(
465 rrd_file_t *rrd_file,
466 const void *buf,
467 size_t count)
468 {
469 #ifdef HAVE_MMAP
470 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
471 rrd_file->pos += count;
472 return count; /* mimmic write() semantics */
473 #else
474 ssize_t _sz = write(rrd_file->fd, buf, count);
476 if (_sz > 0)
477 rrd_file->pos += _sz;
478 return _sz;
479 #endif
480 }
483 /* flush all data pending to be written to FD. */
485 inline void rrd_flush(
486 rrd_file_t *rrd_file)
487 {
488 if (fdatasync(rrd_file->fd) != 0) {
489 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
490 rrd_strerror(errno));
491 }
492 }
495 /* Initialize RRD header. */
497 void rrd_init(
498 rrd_t *rrd)
499 {
500 rrd->stat_head = NULL;
501 rrd->ds_def = NULL;
502 rrd->rra_def = NULL;
503 rrd->live_head = NULL;
504 rrd->rra_ptr = NULL;
505 rrd->pdp_prep = NULL;
506 rrd->cdp_prep = NULL;
507 rrd->rrd_value = NULL;
508 }
511 /* free RRD header data. */
513 #ifdef HAVE_MMAP
514 inline void rrd_free(
515 rrd_t UNUSED(*rrd))
516 {
517 }
518 #else
519 void rrd_free(
520 rrd_t *rrd)
521 {
522 free(rrd->live_head);
523 free(rrd->stat_head);
524 free(rrd->ds_def);
525 free(rrd->rra_def);
526 free(rrd->rra_ptr);
527 free(rrd->pdp_prep);
528 free(rrd->cdp_prep);
529 free(rrd->rrd_value);
530 }
531 #endif
534 /* routine used by external libraries to free memory allocated by
535 * rrd library */
537 void rrd_freemem(
538 void *mem)
539 {
540 free(mem);
541 }
544 /* XXX: FIXME: missing documentation. */
545 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
547 int /*_rrd_*/ readfile(
548 const char *file_name,
549 char **buffer,
550 int skipfirst)
551 {
552 long writecnt = 0, totalcnt = MEMBLK;
553 long offset = 0;
554 FILE *input = NULL;
555 char c;
557 if ((strcmp("-", file_name) == 0)) {
558 input = stdin;
559 } else {
560 if ((input = fopen(file_name, "rb")) == NULL) {
561 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
562 return (-1);
563 }
564 }
565 if (skipfirst) {
566 do {
567 c = getc(input);
568 offset++;
569 } while (c != '\n' && !feof(input));
570 }
571 if (strcmp("-", file_name)) {
572 fseek(input, 0, SEEK_END);
573 /* have extra space for detecting EOF without realloc */
574 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
575 if (totalcnt < MEMBLK)
576 totalcnt = MEMBLK; /* sanitize */
577 fseek(input, offset * sizeof(char), SEEK_SET);
578 }
579 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
580 perror("Allocate Buffer:");
581 exit(1);
582 };
583 do {
584 writecnt +=
585 fread((*buffer) + writecnt, 1,
586 (totalcnt - writecnt) * sizeof(char), input);
587 if (writecnt >= totalcnt) {
588 totalcnt += MEMBLK;
589 if (((*buffer) =
590 rrd_realloc((*buffer),
591 (totalcnt + 4) * sizeof(char))) == NULL) {
592 perror("Realloc Buffer:");
593 exit(1);
594 };
595 }
596 } while (!feof(input));
597 (*buffer)[writecnt] = '\0';
598 if (strcmp("-", file_name) != 0) {
599 fclose(input);
600 };
601 return writecnt;
602 }