1 /*****************************************************************************
2 * RRDtool 1.3.1 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
38 #ifndef PAGE_START
39 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
40 #endif
41 #endif
43 /* Open a database file, return its header and an open filehandle,
44 * positioned to the first cdp in the first rra.
45 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
46 * before returning an error. Do not call rrd_close upon failure of rrd_open.
47 */
49 rrd_file_t *rrd_open(
50 const char *const file_name,
51 rrd_t *rrd,
52 unsigned rdwr)
53 {
54 int flags = 0;
55 mode_t mode = S_IRUSR;
56 int version;
58 #ifdef HAVE_MMAP
59 ssize_t _page_size = sysconf(_SC_PAGESIZE);
60 int mm_prot = PROT_READ, mm_flags = 0;
61 char *data;
62 #endif
63 off_t offset = 0;
64 struct stat statb;
65 rrd_file_t *rrd_file = NULL;
66 off_t newfile_size = 0;
68 if (rdwr & RRD_CREAT) {
69 /* yes bad inline signaling alert, we are using the
70 floatcookie to pass the size in ... only used in resize */
71 newfile_size = (off_t) rrd->stat_head->float_cookie;
72 free(rrd->stat_head);
73 }
74 rrd_init(rrd);
75 rrd_file = malloc(sizeof(rrd_file_t));
76 if (rrd_file == NULL) {
77 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
78 return NULL;
79 }
80 memset(rrd_file, 0, sizeof(rrd_file_t));
82 #ifdef DEBUG
83 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
84 (RRD_READONLY | RRD_READWRITE)) {
85 /* Both READONLY and READWRITE were given, which is invalid. */
86 rrd_set_error("in read/write request mask");
87 exit(-1);
88 }
89 #endif
90 if (rdwr & RRD_READONLY) {
91 flags |= O_RDONLY;
92 #ifdef HAVE_MMAP
93 mm_flags = MAP_PRIVATE;
94 # ifdef MAP_NORESERVE
95 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
96 # endif
97 #endif
98 } else {
99 if (rdwr & RRD_READWRITE) {
100 mode |= S_IWUSR;
101 flags |= O_RDWR;
102 #ifdef HAVE_MMAP
103 mm_flags = MAP_SHARED;
104 mm_prot |= PROT_WRITE;
105 #endif
106 }
107 if (rdwr & RRD_CREAT) {
108 flags |= (O_CREAT | O_TRUNC);
109 }
110 }
111 if (rdwr & RRD_READAHEAD) {
112 #ifdef MAP_POPULATE
113 mm_flags |= MAP_POPULATE; /* populate ptes and data */
114 #endif
115 #if defined MAP_NONBLOCK
116 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
117 #endif
118 }
119 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
120 flags |= O_BINARY;
121 #endif
123 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
124 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
125 goto out_free;
126 }
128 /* Better try to avoid seeks as much as possible. stat may be heavy but
129 * many concurrent seeks are even worse. */
130 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
131 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
132 goto out_close;
133 }
134 if (newfile_size == 0) {
135 rrd_file->file_len = statb.st_size;
136 } else {
137 rrd_file->file_len = newfile_size;
138 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
139 write(rrd_file->fd, "\0", 1); /* poke */
140 lseek(rrd_file->fd, 0, SEEK_SET);
141 }
142 #ifdef HAVE_POSIX_FADVISE
143 /* In general we need no read-ahead when dealing with rrd_files.
144 When we stop reading, it is highly unlikely that we start up again.
145 In this manner we actually save time and diskaccess (and buffer cache).
146 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
147 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
148 #endif
150 /*
151 if (rdwr & RRD_READWRITE)
152 {
153 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
154 rrd_set_error("failed to disable the stream buffer\n");
155 return (-1);
156 }
157 }
158 */
159 #ifdef HAVE_MMAP
160 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
161 rrd_file->fd, offset);
163 /* lets see if the first read worked */
164 if (data == MAP_FAILED) {
165 rrd_set_error("mmaping file '%s': %s", file_name,
166 rrd_strerror(errno));
167 goto out_close;
168 }
169 rrd_file->file_start = data;
170 if (rdwr & RRD_CREAT) {
171 memset(data, DNAN, newfile_size - 1);
172 goto out_done;
173 }
174 #endif
175 if (rdwr & RRD_CREAT)
176 goto out_done;
177 #ifdef USE_MADVISE
178 if (rdwr & RRD_COPY) {
179 /* We will read everything in a moment (copying) */
180 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
181 } else {
182 /* We do not need to read anything in for the moment */
183 madvise(data, rrd_file->file_len, MADV_RANDOM);
184 /* the stat_head will be needed soonish, so hint accordingly */
185 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
186 }
187 #endif
189 __rrd_read(rrd->stat_head, stat_head_t,
190 1);
192 /* lets do some test if we are on track ... */
193 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
194 rrd_set_error("'%s' is not an RRD file", file_name);
195 goto out_nullify_head;
196 }
198 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
199 rrd_set_error("This RRD was created on another architecture");
200 goto out_nullify_head;
201 }
203 version = atoi(rrd->stat_head->version);
205 if (version > atoi(RRD_VERSION)) {
206 rrd_set_error("can't handle RRD file version %s",
207 rrd->stat_head->version);
208 goto out_nullify_head;
209 }
210 #if defined USE_MADVISE
211 /* the ds_def will be needed soonish, so hint accordingly */
212 madvise(data + PAGE_START(offset),
213 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
214 #endif
215 __rrd_read(rrd->ds_def, ds_def_t,
216 rrd->stat_head->ds_cnt);
218 #if defined USE_MADVISE
219 /* the rra_def will be needed soonish, so hint accordingly */
220 madvise(data + PAGE_START(offset),
221 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
222 #endif
223 __rrd_read(rrd->rra_def, rra_def_t,
224 rrd->stat_head->rra_cnt);
226 /* handle different format for the live_head */
227 if (version < 3) {
228 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
229 if (rrd->live_head == NULL) {
230 rrd_set_error("live_head_t malloc");
231 goto out_close;
232 }
233 #if defined USE_MADVISE
234 /* the live_head will be needed soonish, so hint accordingly */
235 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
236 #endif
237 __rrd_read(rrd->legacy_last_up, time_t,
238 1);
240 rrd->live_head->last_up = *rrd->legacy_last_up;
241 rrd->live_head->last_up_usec = 0;
242 } else {
243 #if defined USE_MADVISE
244 /* the live_head will be needed soonish, so hint accordingly */
245 madvise(data + PAGE_START(offset),
246 sizeof(live_head_t), MADV_WILLNEED);
247 #endif
248 __rrd_read(rrd->live_head, live_head_t,
249 1);
250 }
251 __rrd_read(rrd->pdp_prep, pdp_prep_t,
252 rrd->stat_head->ds_cnt);
253 __rrd_read(rrd->cdp_prep, cdp_prep_t,
254 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
255 __rrd_read(rrd->rra_ptr, rra_ptr_t,
256 rrd->stat_head->rra_cnt);
258 rrd_file->header_len = offset;
259 rrd_file->pos = offset;
260 out_done:
261 return (rrd_file);
262 out_nullify_head:
263 rrd->stat_head = NULL;
264 out_close:
265 close(rrd_file->fd);
266 out_free:
267 free(rrd_file);
268 return NULL;
269 }
272 #if defined DEBUG && DEBUG > 1
273 /* Print list of in-core pages of a the current rrd_file. */
274 static
275 void mincore_print(
276 rrd_file_t *rrd_file,
277 char *mark)
278 {
279 #ifdef HAVE_MMAP
280 /* pretty print blocks in core */
281 off_t off;
282 unsigned char *vec;
283 ssize_t _page_size = sysconf(_SC_PAGESIZE);
285 off = rrd_file->file_len +
286 ((rrd_file->file_len + _page_size - 1) / _page_size);
287 vec = malloc(off);
288 if (vec != NULL) {
289 memset(vec, 0, off);
290 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
291 int prev;
292 unsigned is_in = 0, was_in = 0;
294 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
295 is_in = vec[off] & 1; /* if lsb set then is core resident */
296 if (off == 0)
297 was_in = is_in;
298 if (was_in != is_in) {
299 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
300 was_in ? "" : "not ", vec + prev, off - prev);
301 was_in = is_in;
302 prev = off;
303 }
304 }
305 fprintf(stderr,
306 "%s: %sin core: %p len %ld\n", mark,
307 was_in ? "" : "not ", vec + prev, off - prev);
308 } else
309 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
310 }
311 #else
312 fprintf(stderr, "sorry mincore only works with mmap");
313 #endif
314 }
315 #endif /* defined DEBUG && DEBUG > 1 */
318 /* drop cache except for the header and the active pages */
319 void rrd_dontneed(
320 rrd_file_t *rrd_file,
321 rrd_t *rrd)
322 {
323 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
324 unsigned long dontneed_start;
325 unsigned long rra_start;
326 unsigned long active_block;
327 unsigned long i;
328 ssize_t _page_size = sysconf(_SC_PAGESIZE);
330 #if defined DEBUG && DEBUG > 1
331 mincore_print(rrd_file, "before");
332 #endif
334 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
335 rra_start = rrd_file->header_len;
336 dontneed_start = PAGE_START(rra_start) + _page_size;
337 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
338 active_block =
339 PAGE_START(rra_start
340 + rrd->rra_ptr[i].cur_row
341 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
342 if (active_block > dontneed_start) {
343 #ifdef USE_MADVISE
344 madvise(rrd_file->file_start + dontneed_start,
345 active_block - dontneed_start - 1, MADV_DONTNEED);
346 #endif
347 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
348 #ifdef HAVE_POSIX_FADVISE
349 posix_fadvise(rrd_file->fd, dontneed_start,
350 active_block - dontneed_start - 1,
351 POSIX_FADV_DONTNEED);
352 #endif
353 }
354 dontneed_start = active_block;
355 /* do not release 'hot' block if update for this RAA will occur
356 * within 10 minutes */
357 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
358 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
359 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
360 dontneed_start += _page_size;
361 }
362 rra_start +=
363 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
364 sizeof(rrd_value_t);
365 }
366 #ifdef USE_MADVISE
367 madvise(rrd_file->file_start + dontneed_start,
368 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
369 #endif
370 #ifdef HAVE_POSIX_FADVISE
371 posix_fadvise(rrd_file->fd, dontneed_start,
372 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
373 #endif
374 #if defined DEBUG && DEBUG > 1
375 mincore_print(rrd_file, "after");
376 #endif
377 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
378 }
384 int rrd_close(
385 rrd_file_t *rrd_file)
386 {
387 int ret;
389 #ifdef HAVE_MMAP
390 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
391 if (ret != 0)
392 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
393 ret = munmap(rrd_file->file_start, rrd_file->file_len);
394 if (ret != 0)
395 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
396 #endif
397 ret = close(rrd_file->fd);
398 if (ret != 0)
399 rrd_set_error("closing file: %s", rrd_strerror(errno));
400 free(rrd_file);
401 rrd_file = NULL;
402 return ret;
403 }
406 /* Set position of rrd_file. */
408 off_t rrd_seek(
409 rrd_file_t *rrd_file,
410 off_t off,
411 int whence)
412 {
413 off_t ret = 0;
415 #ifdef HAVE_MMAP
416 if (whence == SEEK_SET)
417 rrd_file->pos = off;
418 else if (whence == SEEK_CUR)
419 rrd_file->pos += off;
420 else if (whence == SEEK_END)
421 rrd_file->pos = rrd_file->file_len + off;
422 #else
423 ret = lseek(rrd_file->fd, off, whence);
424 if (ret < 0)
425 rrd_set_error("lseek: %s", rrd_strerror(errno));
426 rrd_file->pos = ret;
427 #endif
428 /* mimic fseek, which returns 0 upon success */
429 return ret < 0; /*XXX: or just ret to mimic lseek */
430 }
433 /* Get current position in rrd_file. */
435 off_t rrd_tell(
436 rrd_file_t *rrd_file)
437 {
438 return rrd_file->pos;
439 }
442 /* Read count bytes into buffer buf, starting at rrd_file->pos.
443 * Returns the number of bytes read or <0 on error. */
445 ssize_t rrd_read(
446 rrd_file_t *rrd_file,
447 void *buf,
448 size_t count)
449 {
450 #ifdef HAVE_MMAP
451 size_t _cnt = count;
452 ssize_t _surplus;
454 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
455 return 0;
456 if (buf == NULL)
457 return -1; /* EINVAL */
458 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
459 if (_surplus > 0) { /* short read */
460 _cnt -= _surplus;
461 }
462 if (_cnt == 0)
463 return 0; /* EOF */
464 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
466 rrd_file->pos += _cnt; /* mimmic read() semantics */
467 return _cnt;
468 #else
469 ssize_t ret;
471 ret = read(rrd_file->fd, buf, count);
472 if (ret > 0)
473 rrd_file->pos += ret; /* mimmic read() semantics */
474 return ret;
475 #endif
476 }
479 /* Write count bytes from buffer buf to the current position
480 * rrd_file->pos of rrd_file->fd.
481 * Returns the number of bytes written or <0 on error. */
483 ssize_t rrd_write(
484 rrd_file_t *rrd_file,
485 const void *buf,
486 size_t count)
487 {
488 #ifdef HAVE_MMAP
489 if (count == 0)
490 return 0;
491 if (buf == NULL)
492 return -1; /* EINVAL */
493 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
494 rrd_file->pos += count;
495 return count; /* mimmic write() semantics */
496 #else
497 ssize_t _sz = write(rrd_file->fd, buf, count);
499 if (_sz > 0)
500 rrd_file->pos += _sz;
501 return _sz;
502 #endif
503 }
506 /* flush all data pending to be written to FD. */
508 void rrd_flush(
509 rrd_file_t *rrd_file)
510 {
511 if (fdatasync(rrd_file->fd) != 0) {
512 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
513 rrd_strerror(errno));
514 }
515 }
518 /* Initialize RRD header. */
520 void rrd_init(
521 rrd_t *rrd)
522 {
523 rrd->stat_head = NULL;
524 rrd->ds_def = NULL;
525 rrd->rra_def = NULL;
526 rrd->live_head = NULL;
527 rrd->legacy_last_up = NULL;
528 rrd->rra_ptr = NULL;
529 rrd->pdp_prep = NULL;
530 rrd->cdp_prep = NULL;
531 rrd->rrd_value = NULL;
532 }
535 /* free RRD header data. */
537 #ifdef HAVE_MMAP
538 void rrd_free(
539 rrd_t *rrd)
540 {
541 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
542 free(rrd->live_head);
543 }
544 }
545 #else
546 void rrd_free(
547 rrd_t *rrd)
548 {
549 free(rrd->live_head);
550 free(rrd->stat_head);
551 free(rrd->ds_def);
552 free(rrd->rra_def);
553 free(rrd->rra_ptr);
554 free(rrd->pdp_prep);
555 free(rrd->cdp_prep);
556 free(rrd->rrd_value);
557 }
558 #endif
561 /* routine used by external libraries to free memory allocated by
562 * rrd library */
564 void rrd_freemem(
565 void *mem)
566 {
567 free(mem);
568 }