1 /*****************************************************************************
2 * RRDtool 1.3rc9 Copyright by Tobi Oetiker, 1997-2008
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 *****************************************************************************/
9 #include "rrd_tool.h"
10 #include "unused.h"
11 #define MEMBLK 8192
13 /* DEBUG 2 prints information obtained via mincore(2) */
14 #define DEBUG 1
15 /* do not calculate exact madvise hints but assume 1 page for headers and
16 * set DONTNEED for the rest, which is assumed to be data */
17 /* Avoid calling madvise on areas that were already hinted. May be benefical if
18 * your syscalls are very slow */
20 #ifdef HAVE_MMAP
21 /* the cast to void* is there to avoid this warning seen on ia64 with certain
22 versions of gcc: 'cast increases required alignment of target type'
23 */
24 #define __rrd_read(dst, dst_t, cnt) \
25 (dst) = (dst_t*)(void*) (data + offset); \
26 offset += sizeof(dst_t) * (cnt)
27 #else
28 #define __rrd_read(dst, dst_t, cnt) \
29 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
30 rrd_set_error(#dst " malloc"); \
31 goto out_nullify_head; \
32 } \
33 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
34 #endif
36 /* get the address of the start of this page */
37 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
38 #ifndef PAGE_START
39 #define PAGE_START(addr) ((addr)&(~(_page_size-1)))
40 #endif
41 #endif
43 /* Open a database file, return its header and an open filehandle,
44 * positioned to the first cdp in the first rra.
45 * In the error path of rrd_open, only rrd_free(&rrd) has to be called
46 * before returning an error. Do not call rrd_close upon failure of rrd_open.
47 */
49 rrd_file_t *rrd_open(
50 const char *const file_name,
51 rrd_t *rrd,
52 unsigned rdwr)
53 {
54 int flags = 0;
55 mode_t mode = S_IRUSR;
56 int version;
58 #ifdef HAVE_MMAP
59 ssize_t _page_size = sysconf(_SC_PAGESIZE);
60 int mm_prot = PROT_READ, mm_flags = 0;
61 char *data;
62 #endif
63 off_t offset = 0;
64 struct stat statb;
65 rrd_file_t *rrd_file = NULL;
66 off_t newfile_size = 0;
68 if (rdwr & RRD_CREAT) {
69 /* yes bad inline signaling alert, we are using the
70 floatcookie to pass the size in ... only used in resize */
71 newfile_size = (off_t) rrd->stat_head->float_cookie;
72 free(rrd->stat_head);
73 }
74 rrd_init(rrd);
75 rrd_file = malloc(sizeof(rrd_file_t));
76 if (rrd_file == NULL) {
77 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
78 return NULL;
79 }
80 memset(rrd_file, 0, sizeof(rrd_file_t));
82 #ifdef DEBUG
83 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
84 (RRD_READONLY | RRD_READWRITE)) {
85 /* Both READONLY and READWRITE were given, which is invalid. */
86 rrd_set_error("in read/write request mask");
87 exit(-1);
88 }
89 #endif
90 if (rdwr & RRD_READONLY) {
91 flags |= O_RDONLY;
92 #ifdef HAVE_MMAP
93 mm_flags = MAP_PRIVATE;
94 # ifdef MAP_NORESERVE
95 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
96 # endif
97 #endif
98 } else {
99 if (rdwr & RRD_READWRITE) {
100 mode |= S_IWUSR;
101 flags |= O_RDWR;
102 #ifdef HAVE_MMAP
103 mm_flags = MAP_SHARED;
104 mm_prot |= PROT_WRITE;
105 #endif
106 }
107 if (rdwr & RRD_CREAT) {
108 flags |= (O_CREAT | O_TRUNC);
109 }
110 }
111 if (rdwr & RRD_READAHEAD) {
112 #ifdef MAP_POPULATE
113 mm_flags |= MAP_POPULATE; /* populate ptes and data */
114 #endif
115 #if defined MAP_NONBLOCK
116 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
117 #endif
118 }
120 #if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)
121 flags |= O_BINARY;
122 #endif
124 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
125 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
126 goto out_free;
127 }
129 /* Better try to avoid seeks as much as possible. stat may be heavy but
130 * many concurrent seeks are even worse. */
131 if (newfile_size == 0 && ((fstat(rrd_file->fd, &statb)) < 0)) {
132 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
133 goto out_close;
134 }
135 if (newfile_size == 0) {
136 rrd_file->file_len = statb.st_size;
137 } else {
138 rrd_file->file_len = newfile_size;
139 lseek(rrd_file->fd, newfile_size - 1, SEEK_SET);
140 write(rrd_file->fd, "\0", 1); /* poke */
141 lseek(rrd_file->fd, 0, SEEK_SET);
142 }
143 #ifdef HAVE_POSIX_FADVISE
144 /* In general we need no read-ahead when dealing with rrd_files.
145 When we stop reading, it is highly unlikely that we start up again.
146 In this manner we actually save time and diskaccess (and buffer cache).
147 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
148 posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM);
149 #endif
151 /*
152 if (rdwr & RRD_READWRITE)
153 {
154 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
155 rrd_set_error("failed to disable the stream buffer\n");
156 return (-1);
157 }
158 }
159 */
160 #ifdef HAVE_MMAP
161 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
162 rrd_file->fd, offset);
164 /* lets see if the first read worked */
165 if (data == MAP_FAILED) {
166 rrd_set_error("mmaping file '%s': %s", file_name,
167 rrd_strerror(errno));
168 goto out_close;
169 }
170 rrd_file->file_start = data;
171 if (rdwr & RRD_CREAT) {
172 memset(data, DNAN, newfile_size - 1);
173 goto out_done;
174 }
175 #endif
176 if (rdwr & RRD_CREAT)
177 goto out_done;
178 #ifdef USE_MADVISE
179 if (rdwr & RRD_COPY) {
180 /* We will read everything in a moment (copying) */
181 madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
182 } else {
183 /* We do not need to read anything in for the moment */
184 madvise(data, rrd_file->file_len, MADV_RANDOM);
185 /* the stat_head will be needed soonish, so hint accordingly */
186 madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM);
187 }
188 #endif
190 __rrd_read(rrd->stat_head, stat_head_t,
191 1);
193 /* lets do some test if we are on track ... */
194 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
195 rrd_set_error("'%s' is not an RRD file", file_name);
196 goto out_nullify_head;
197 }
199 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
200 rrd_set_error("This RRD was created on another architecture");
201 goto out_nullify_head;
202 }
204 version = atoi(rrd->stat_head->version);
206 if (version > atoi(RRD_VERSION)) {
207 rrd_set_error("can't handle RRD file version %s",
208 rrd->stat_head->version);
209 goto out_nullify_head;
210 }
211 #if defined USE_MADVISE
212 /* the ds_def will be needed soonish, so hint accordingly */
213 madvise(data + PAGE_START(offset),
214 sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED);
215 #endif
216 __rrd_read(rrd->ds_def, ds_def_t,
217 rrd->stat_head->ds_cnt);
219 #if defined USE_MADVISE
220 /* the rra_def will be needed soonish, so hint accordingly */
221 madvise(data + PAGE_START(offset),
222 sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED);
223 #endif
224 __rrd_read(rrd->rra_def, rra_def_t,
225 rrd->stat_head->rra_cnt);
227 /* handle different format for the live_head */
228 if (version < 3) {
229 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
230 if (rrd->live_head == NULL) {
231 rrd_set_error("live_head_t malloc");
232 goto out_close;
233 }
234 #if defined USE_MADVISE
235 /* the live_head will be needed soonish, so hint accordingly */
236 madvise(data + PAGE_START(offset), sizeof(time_t), MADV_WILLNEED);
237 #endif
238 __rrd_read(rrd->legacy_last_up, time_t,
239 1);
241 rrd->live_head->last_up = *rrd->legacy_last_up;
242 rrd->live_head->last_up_usec = 0;
243 } else {
244 #if defined USE_MADVISE
245 /* the live_head will be needed soonish, so hint accordingly */
246 madvise(data + PAGE_START(offset),
247 sizeof(live_head_t), MADV_WILLNEED);
248 #endif
249 __rrd_read(rrd->live_head, live_head_t,
250 1);
251 }
252 __rrd_read(rrd->pdp_prep, pdp_prep_t,
253 rrd->stat_head->ds_cnt);
254 __rrd_read(rrd->cdp_prep, cdp_prep_t,
255 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
256 __rrd_read(rrd->rra_ptr, rra_ptr_t,
257 rrd->stat_head->rra_cnt);
259 rrd_file->header_len = offset;
260 rrd_file->pos = offset;
261 out_done:
262 return (rrd_file);
263 out_nullify_head:
264 rrd->stat_head = NULL;
265 out_close:
266 close(rrd_file->fd);
267 out_free:
268 free(rrd_file);
269 return NULL;
270 }
273 #if defined DEBUG && DEBUG > 1
274 /* Print list of in-core pages of a the current rrd_file. */
275 static
276 void mincore_print(
277 rrd_file_t *rrd_file,
278 char *mark)
279 {
280 #ifdef HAVE_MMAP
281 /* pretty print blocks in core */
282 off_t off;
283 unsigned char *vec;
284 ssize_t _page_size = sysconf(_SC_PAGESIZE);
286 off = rrd_file->file_len +
287 ((rrd_file->file_len + _page_size - 1) / _page_size);
288 vec = malloc(off);
289 if (vec != NULL) {
290 memset(vec, 0, off);
291 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
292 int prev;
293 unsigned is_in = 0, was_in = 0;
295 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
296 is_in = vec[off] & 1; /* if lsb set then is core resident */
297 if (off == 0)
298 was_in = is_in;
299 if (was_in != is_in) {
300 fprintf(stderr, "%s: %sin core: %p len %ld\n", mark,
301 was_in ? "" : "not ", vec + prev, off - prev);
302 was_in = is_in;
303 prev = off;
304 }
305 }
306 fprintf(stderr,
307 "%s: %sin core: %p len %ld\n", mark,
308 was_in ? "" : "not ", vec + prev, off - prev);
309 } else
310 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
311 }
312 #else
313 fprintf(stderr, "sorry mincore only works with mmap");
314 #endif
315 }
316 #endif /* defined DEBUG && DEBUG > 1 */
319 /* drop cache except for the header and the active pages */
320 void rrd_dontneed(
321 rrd_file_t *rrd_file,
322 rrd_t *rrd)
323 {
324 #if defined USE_MADVISE || defined HAVE_POSIX_FADVISE
325 unsigned long dontneed_start;
326 unsigned long rra_start;
327 unsigned long active_block;
328 unsigned long i;
329 ssize_t _page_size = sysconf(_SC_PAGESIZE);
331 #if defined DEBUG && DEBUG > 1
332 mincore_print(rrd_file, "before");
333 #endif
335 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
336 rra_start = rrd_file->header_len;
337 dontneed_start = PAGE_START(rra_start) + _page_size;
338 for (i = 0; i < rrd->stat_head->rra_cnt; ++i) {
339 active_block =
340 PAGE_START(rra_start
341 + rrd->rra_ptr[i].cur_row
342 * rrd->stat_head->ds_cnt * sizeof(rrd_value_t));
343 if (active_block > dontneed_start) {
344 #ifdef USE_MADVISE
345 madvise(rrd_file->file_start + dontneed_start,
346 active_block - dontneed_start - 1, MADV_DONTNEED);
347 #endif
348 /* in linux at least only fadvise DONTNEED seems to purge pages from cache */
349 #ifdef HAVE_POSIX_FADVISE
350 posix_fadvise(rrd_file->fd, dontneed_start,
351 active_block - dontneed_start - 1,
352 POSIX_FADV_DONTNEED);
353 #endif
354 }
355 dontneed_start = active_block;
356 /* do not release 'hot' block if update for this RAA will occur
357 * within 10 minutes */
358 if (rrd->stat_head->pdp_step * rrd->rra_def[i].pdp_cnt -
359 rrd->live_head->last_up % (rrd->stat_head->pdp_step *
360 rrd->rra_def[i].pdp_cnt) < 10 * 60) {
361 dontneed_start += _page_size;
362 }
363 rra_start +=
364 rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt *
365 sizeof(rrd_value_t);
366 }
367 #ifdef USE_MADVISE
368 madvise(rrd_file->file_start + dontneed_start,
369 rrd_file->file_len - dontneed_start, MADV_DONTNEED);
370 #endif
371 #ifdef HAVE_POSIX_FADVISE
372 posix_fadvise(rrd_file->fd, dontneed_start,
373 rrd_file->file_len - dontneed_start, POSIX_FADV_DONTNEED);
374 #endif
375 #if defined DEBUG && DEBUG > 1
376 mincore_print(rrd_file, "after");
377 #endif
378 #endif /* without madvise and posix_fadvise ist does not make much sense todo anything */
379 }
385 int rrd_close(
386 rrd_file_t *rrd_file)
387 {
388 int ret;
390 #ifdef HAVE_MMAP
391 ret = msync(rrd_file->file_start, rrd_file->file_len, MS_ASYNC);
392 if (ret != 0)
393 rrd_set_error("msync rrd_file: %s", rrd_strerror(errno));
394 ret = munmap(rrd_file->file_start, rrd_file->file_len);
395 if (ret != 0)
396 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
397 #endif
398 ret = close(rrd_file->fd);
399 if (ret != 0)
400 rrd_set_error("closing file: %s", rrd_strerror(errno));
401 free(rrd_file);
402 rrd_file = NULL;
403 return ret;
404 }
407 /* Set position of rrd_file. */
409 off_t rrd_seek(
410 rrd_file_t *rrd_file,
411 off_t off,
412 int whence)
413 {
414 off_t ret = 0;
416 #ifdef HAVE_MMAP
417 if (whence == SEEK_SET)
418 rrd_file->pos = off;
419 else if (whence == SEEK_CUR)
420 rrd_file->pos += off;
421 else if (whence == SEEK_END)
422 rrd_file->pos = rrd_file->file_len + off;
423 #else
424 ret = lseek(rrd_file->fd, off, whence);
425 if (ret < 0)
426 rrd_set_error("lseek: %s", rrd_strerror(errno));
427 rrd_file->pos = ret;
428 #endif
429 /* mimic fseek, which returns 0 upon success */
430 return ret < 0; /*XXX: or just ret to mimic lseek */
431 }
434 /* Get current position in rrd_file. */
436 inline off_t rrd_tell(
437 rrd_file_t *rrd_file)
438 {
439 return rrd_file->pos;
440 }
443 /* Read count bytes into buffer buf, starting at rrd_file->pos.
444 * Returns the number of bytes read or <0 on error. */
446 inline ssize_t rrd_read(
447 rrd_file_t *rrd_file,
448 void *buf,
449 size_t count)
450 {
451 #ifdef HAVE_MMAP
452 size_t _cnt = count;
453 ssize_t _surplus;
455 if (rrd_file->pos > rrd_file->file_len || _cnt == 0) /* EOF */
456 return 0;
457 if (buf == NULL)
458 return -1; /* EINVAL */
459 _surplus = rrd_file->pos + _cnt - rrd_file->file_len;
460 if (_surplus > 0) { /* short read */
461 _cnt -= _surplus;
462 }
463 if (_cnt == 0)
464 return 0; /* EOF */
465 buf = memcpy(buf, rrd_file->file_start + rrd_file->pos, _cnt);
467 rrd_file->pos += _cnt; /* mimmic read() semantics */
468 return _cnt;
469 #else
470 ssize_t ret;
472 ret = read(rrd_file->fd, buf, count);
473 if (ret > 0)
474 rrd_file->pos += ret; /* mimmic read() semantics */
475 return ret;
476 #endif
477 }
480 /* Write count bytes from buffer buf to the current position
481 * rrd_file->pos of rrd_file->fd.
482 * Returns the number of bytes written or <0 on error. */
484 inline ssize_t rrd_write(
485 rrd_file_t *rrd_file,
486 const void *buf,
487 size_t count)
488 {
489 #ifdef HAVE_MMAP
490 if (count == 0)
491 return 0;
492 if (buf == NULL)
493 return -1; /* EINVAL */
494 memcpy(rrd_file->file_start + rrd_file->pos, buf, count);
495 rrd_file->pos += count;
496 return count; /* mimmic write() semantics */
497 #else
498 ssize_t _sz = write(rrd_file->fd, buf, count);
500 if (_sz > 0)
501 rrd_file->pos += _sz;
502 return _sz;
503 #endif
504 }
507 /* flush all data pending to be written to FD. */
509 inline void rrd_flush(
510 rrd_file_t *rrd_file)
511 {
512 if (fdatasync(rrd_file->fd) != 0) {
513 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
514 rrd_strerror(errno));
515 }
516 }
519 /* Initialize RRD header. */
521 void rrd_init(
522 rrd_t *rrd)
523 {
524 rrd->stat_head = NULL;
525 rrd->ds_def = NULL;
526 rrd->rra_def = NULL;
527 rrd->live_head = NULL;
528 rrd->legacy_last_up = NULL;
529 rrd->rra_ptr = NULL;
530 rrd->pdp_prep = NULL;
531 rrd->cdp_prep = NULL;
532 rrd->rrd_value = NULL;
533 }
536 /* free RRD header data. */
538 #ifdef HAVE_MMAP
539 void rrd_free(
540 rrd_t *rrd)
541 {
542 if (rrd->legacy_last_up) { /* this gets set for version < 3 only */
543 free(rrd->live_head);
544 }
545 }
546 #else
547 void rrd_free(
548 rrd_t *rrd)
549 {
550 free(rrd->live_head);
551 free(rrd->stat_head);
552 free(rrd->ds_def);
553 free(rrd->rra_def);
554 free(rrd->rra_ptr);
555 free(rrd->pdp_prep);
556 free(rrd->cdp_prep);
557 free(rrd->rrd_value);
558 }
559 #endif
562 /* routine used by external libraries to free memory allocated by
563 * rrd library */
565 void rrd_freemem(
566 void *mem)
567 {
568 free(mem);
569 }