c986f750beca7bec9fb3f0cba5c5a488cfda17d2
1 /*****************************************************************************
2 * RRDtool 1.2.23 Copyright by Tobi Oetiker, 1997-2007
3 *****************************************************************************
4 * rrd_open.c Open an RRD File
5 *****************************************************************************
6 * $Id$
7 * $Log$
8 * Revision 1.10 2004/05/26 22:11:12 oetiker
9 * reduce compiler warnings. Many small fixes. -- Mike Slifcak <slif@bellsouth.net>
10 *
11 * Revision 1.9 2003/04/29 21:56:49 oetiker
12 * readline in rrd_open.c reads the file in 8 KB blocks, and calls realloc for
13 * each block. realloc is very slow in Mac OS X for huge blocks, e.g. when
14 * restoring databases from huge xml files. This patch finds the size of the
15 * file, and starts out with malloc'ing the full size.
16 * -- Peter Speck <speck@ruc.dk>
17 *
18 * Revision 1.8 2003/04/11 19:43:44 oetiker
19 * New special value COUNT which allows calculations based on the position of a
20 * value within a data set. Bug fix in rrd_rpncalc.c. PREV returned erroneus
21 * value for the second value. Bug fix in rrd_restore.c. Bug causing seek error
22 * when accesing an RRD restored from an xml that holds an RRD version <3.
23 * -- Ruben Justo <ruben@ainek.com>
24 *
25 * Revision 1.7 2003/03/31 21:22:12 oetiker
26 * enables RRDtool updates with microsecond or in case of windows millisecond
27 * precision. This is needed to reduce time measurement error when archive step
28 * is small. (<30s) -- Sasha Mikheev <sasha@avalon-net.co.il>
29 *
30 * Revision 1.6 2003/02/13 07:05:27 oetiker
31 * Find attached the patch I promised to send to you. Please note that there
32 * are three new source files (src/rrd_is_thread_safe.h, src/rrd_thread_safe.c
33 * and src/rrd_not_thread_safe.c) and the introduction of librrd_th. This
34 * library is identical to librrd, but it contains support code for per-thread
35 * global variables currently used for error information only. This is similar
36 * to how errno per-thread variables are implemented. librrd_th must be linked
37 * alongside of libpthred
38 *
39 * There is also a new file "THREADS", holding some documentation.
40 *
41 * -- Peter Stamfest <peter@stamfest.at>
42 *
43 * Revision 1.5 2002/06/20 00:21:03 jake
44 * More Win32 build changes; thanks to Kerry Calvert.
45 *
46 * Revision 1.4 2002/02/01 20:34:49 oetiker
47 * fixed version number and date/time
48 *
49 * Revision 1.3 2001/03/04 13:01:55 oetiker
50 * Aberrant Behavior Detection support. A brief overview added to rrdtool.pod.
51 * Major updates to rrd_update.c, rrd_create.c. Minor update to other core files.
52 * This is backwards compatible! But new files using the Aberrant stuff are not readable
53 * by old rrdtool versions. See http://cricket.sourceforge.net/aberrant/rrd_hw.htm
54 * -- Jake Brutlag <jakeb@corp.webtv.net>
55 *
56 * Revision 1.2 2001/03/04 10:29:20 oetiker
57 * fixed filedescriptor leak
58 * -- Mike Franusich <mike@franusich.com>
59 *
60 * Revision 1.1.1.1 2001/02/25 22:25:05 oetiker
61 * checkin
62 *
63 *****************************************************************************/
65 #include "rrd_tool.h"
66 #include "unused.h"
67 #define MEMBLK 8192
69 /* DEBUG 2 prints information obtained via mincore(2) */
70 // #define DEBUG 2
71 /* do not calculate exact madvise hints but assume 2 pages for headers and
72 * set DONTNEED for the rest, which is assumed to be data */
73 //#define TWO_PAGES 1
74 /* Avoid calling madvise on areas that were already hinted. May be benefical if
75 * your syscalls are very slow */
76 //#define CHECK_MADVISE_OVERLAPS 1
78 #ifdef HAVE_MMAP
79 #define __rrd_read(dst, dst_t, cnt) \
80 (dst) = (dst_t*) (data + offset); \
81 offset += sizeof(dst_t) * (cnt)
82 #else
83 #define __rrd_read(dst, dst_t, cnt) \
84 if ((dst = malloc(sizeof(dst_t)*(cnt))) == NULL) { \
85 rrd_set_error(#dst " malloc"); \
86 goto out_nullify_head; \
87 } \
88 offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt))
89 #endif
91 /* next page-aligned (i.e. page-align up) */
92 #ifndef PAGE_ALIGN
93 #define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
94 #endif
95 /* previous page-aligned (i.e. page-align down) */
96 #ifndef PAGE_ALIGN_DOWN
97 #define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1)))
98 #endif
100 #ifdef HAVE_MMAP
101 /* vector of last madvise hint */
102 typedef struct _madvise_vec_t {
103 void *start;
104 ssize_t length;
105 } _madvise_vec_t;
106 _madvise_vec_t _madv_vec = { NULL, 0 };
107 #endif
109 #if defined CHECK_MADVISE_OVERLAPS
110 #define _madvise(_start, _off, _hint) \
111 if ((_start) != _madv_vec.start && (ssize_t)(_off) != _madv_vec.length) { \
112 _madv_vec.start = (_start) ; _madv_vec.length = (_off); \
113 madvise((_start), (_off), (_hint)); \
114 }
115 #else
116 #define _madvise(_start, _off, _hint) \
117 madvise((_start), (_off), (_hint))
118 #endif
120 /* open a database file, return its header and an open filehandle */
121 /* positioned to the first cdp in the first rra */
123 rrd_file_t *rrd_open(
124 const char *const file_name,
125 rrd_t *rrd,
126 unsigned rdwr)
127 {
128 int flags = 0;
129 mode_t mode = S_IRUSR;
130 int version;
132 #ifdef HAVE_MMAP
133 ssize_t _page_size = sysconf(_SC_PAGESIZE);
134 int mm_prot = PROT_READ, mm_flags = 0;
135 char *data;
136 #endif
137 off_t offset = 0;
138 struct stat statb;
139 rrd_file_t *rrd_file = malloc(sizeof(rrd_file_t));
141 if (rrd_file == NULL) {
142 rrd_set_error("allocating rrd_file descriptor for '%s'", file_name);
143 return NULL;
144 }
145 memset(rrd_file, 0, sizeof(rrd_file_t));
146 rrd_init(rrd);
147 #ifdef DEBUG
148 if ((rdwr & (RRD_READONLY | RRD_READWRITE)) ==
149 (RRD_READONLY | RRD_READWRITE)) {
150 /* Both READONLY and READWRITE were given, which is invalid. */
151 rrd_set_error("in read/write request mask");
152 exit(-1);
153 }
154 #endif
155 if (rdwr & RRD_READONLY) {
156 flags |= O_RDONLY;
157 #ifdef HAVE_MMAP
158 mm_flags = MAP_PRIVATE;
159 # ifdef MAP_NORESERVE
160 mm_flags |= MAP_NORESERVE; /* readonly, so no swap backing needed */
161 # endif
162 #endif
163 } else {
164 if (rdwr & RRD_READWRITE) {
165 mode |= S_IWUSR;
166 flags |= O_RDWR;
167 #ifdef HAVE_MMAP
168 mm_flags = MAP_SHARED;
169 mm_prot |= PROT_WRITE;
170 #endif
171 }
172 if (rdwr & RRD_CREAT) {
173 flags |= (O_CREAT | O_TRUNC);
174 }
175 }
176 if (rdwr & RRD_READAHEAD) {
177 #ifdef MAP_POPULATE
178 mm_flags |= MAP_POPULATE; /* populate ptes and data */
179 #endif
180 #if defined MAP_NONBLOCK
181 mm_flags |= MAP_NONBLOCK; /* just populate ptes */
182 #endif
183 #ifdef USE_DIRECT_IO
184 } else {
185 flags |= O_DIRECT;
186 #endif
187 }
188 #ifdef O_NONBLOCK
189 flags |= O_NONBLOCK;
190 #endif
192 if ((rrd_file->fd = open(file_name, flags, mode)) < 0) {
193 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
194 return NULL;
195 }
197 /* Better try to avoid seeks as much as possible. stat may be heavy but
198 * many concurrent seeks are even worse. */
199 if ((fstat(rrd_file->fd, &statb)) < 0) {
200 rrd_set_error("fstat '%s': %s", file_name, rrd_strerror(errno));
201 goto out_close;
202 }
203 rrd_file->file_len = statb.st_size;
205 #ifdef HAVE_POSIX_FADVISE
206 /* In general we need no read-ahead when dealing with rrd_files.
207 When we stop reading, it is highly unlikely that we start up again.
208 In this manner we actually save time and diskaccess (and buffer cache).
209 Thanks to Dave Plonka for the Idea of using POSIX_FADV_RANDOM here. */
210 if (0 != posix_fadvise(rrd_file->fd, 0, 0, POSIX_FADV_RANDOM)) {
211 rrd_set_error("setting POSIX_FADV_RANDOM on '%s': %s", file_name,
212 rrd_strerror(errno));
213 goto out_close;
214 }
215 #endif
217 /*
218 if (rdwr & RRD_READWRITE)
219 {
220 if (setvbuf((rrd_file->fd),NULL,_IONBF,2)) {
221 rrd_set_error("failed to disable the stream buffer\n");
222 return (-1);
223 }
224 }
225 */
226 #ifdef HAVE_MMAP
227 data = mmap(0, rrd_file->file_len, mm_prot, mm_flags,
228 rrd_file->fd, offset);
230 /* lets see if the first read worked */
231 if (data == MAP_FAILED) {
232 rrd_set_error("error mmaping file '%s': %s", file_name,
233 rrd_strerror(errno));
234 goto out_close;
235 }
236 rrd_file->file_start = data;
237 #endif
238 #ifdef USE_MADVISE
239 if (rdwr & RRD_COPY) {
240 /* We will read everything in a moment (copying) */
241 _madvise(data, rrd_file->file_len, MADV_WILLNEED | MADV_SEQUENTIAL);
242 goto out_done;
243 }
244 /* We do not need to read anything in for the moment */
245 #ifndef TWO_PAGES
246 _madvise(data, rrd_file->file_len, MADV_DONTNEED);
247 // _madvise(data, rrd_file->file_len, MADV_RANDOM);
248 #else
249 /* alternatively: keep 2 pages worth of data, likely headers,
250 * don't need the rest. */
251 _madvise(data, _page_size * 2, MADV_WILLNEED | MADV_SEQUENTIAL);
252 _madvise(data + _page_size * 2, (rrd_file->file_len >= _page_size * 2)
253 ? rrd_file->file_len - _page_size * 2 : 0, MADV_DONTNEED);
254 #endif
255 #endif
257 #if defined USE_MADVISE && !defined TWO_PAGES
258 /* the stat_head will be needed soonish, so hint accordingly */
259 // too finegrained to calc the individual sizes, just keep 2 pages worth of hdr
260 _madvise(data + PAGE_ALIGN_DOWN(offset), PAGE_ALIGN(sizeof(stat_head_t)),
261 MADV_WILLNEED);
263 #endif
265 __rrd_read(rrd->stat_head, stat_head_t,
266 1);
268 /* lets do some test if we are on track ... */
269 if (memcmp(rrd->stat_head->cookie, RRD_COOKIE, sizeof(RRD_COOKIE)) != 0) {
270 rrd_set_error("'%s' is not an RRD file", file_name);
271 goto out_nullify_head;
272 }
274 if (rrd->stat_head->float_cookie != FLOAT_COOKIE) {
275 rrd_set_error("This RRD was created on other architecture");
276 goto out_nullify_head;
277 }
279 version = atoi(rrd->stat_head->version);
281 if (version > atoi(RRD_VERSION)) {
282 rrd_set_error("can't handle RRD file version %s",
283 rrd->stat_head->version);
284 goto out_nullify_head;
285 }
286 #if defined USE_MADVISE && !defined TWO_PAGES
287 /* the ds_def will be needed soonish, so hint accordingly */
288 _madvise(data + PAGE_ALIGN_DOWN(offset),
289 PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt),
290 MADV_WILLNEED);
291 #endif
292 __rrd_read(rrd->ds_def, ds_def_t,
293 rrd->stat_head->ds_cnt);
295 #if defined USE_MADVISE && !defined TWO_PAGES
296 /* the rra_def will be needed soonish, so hint accordingly */
297 _madvise(data + PAGE_ALIGN_DOWN(offset),
298 PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt),
299 MADV_WILLNEED);
300 #endif
301 __rrd_read(rrd->rra_def, rra_def_t,
302 rrd->stat_head->rra_cnt);
304 /* handle different format for the live_head */
305 if (version < 3) {
306 rrd->live_head = (live_head_t *) malloc(sizeof(live_head_t));
307 if (rrd->live_head == NULL) {
308 rrd_set_error("live_head_t malloc");
309 goto out_close;
310 }
311 #ifdef HAVE_MMAP
312 memmove(&rrd->live_head->last_up, data + offset, sizeof(long));
313 offset += sizeof(long);
314 #else
315 offset += read(rrd_file->fd, &rrd->live_head->last_up, sizeof(long));
316 #endif
317 rrd->live_head->last_up_usec = 0;
318 } else {
319 #if defined USE_MADVISE && !defined TWO_PAGES
320 /* the live_head will be needed soonish, so hint accordingly */
321 _madvise(data + PAGE_ALIGN_DOWN(offset),
322 PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED);
323 #endif
324 __rrd_read(rrd->live_head, live_head_t,
325 1);
326 }
327 //XXX: This doesn't look like it needs madvise
328 __rrd_read(rrd->pdp_prep, pdp_prep_t,
329 rrd->stat_head->ds_cnt);
331 //XXX: This could benefit from madvise()ing
332 __rrd_read(rrd->cdp_prep, cdp_prep_t,
333 rrd->stat_head->rra_cnt * rrd->stat_head->ds_cnt);
335 //XXX: This could benefit from madvise()ing
336 __rrd_read(rrd->rra_ptr, rra_ptr_t,
337 rrd->stat_head->rra_cnt);
339 #ifdef USE_MADVISE
340 out_done:
341 #endif
342 rrd_file->header_len = offset;
343 rrd_file->pos = offset;
345 return (rrd_file);
346 out_nullify_head:
347 rrd->stat_head = NULL;
348 out_close:
349 close(rrd_file->fd);
350 return NULL;
351 }
354 /* Close a reference to an rrd_file. */
356 int rrd_close(
357 rrd_file_t *rrd_file)
358 {
359 int ret;
361 #if defined HAVE_MMAP
362 ssize_t _page_size = sysconf(_SC_PAGESIZE);
363 #endif
364 #if defined DEBUG && DEBUG > 1
365 /* pretty print blocks in core */
366 off_t off;
367 unsigned char *vec;
369 off =
370 rrd_file->file_len +
371 ((rrd_file->file_len + sysconf(_SC_PAGESIZE) -
372 1) / sysconf(_SC_PAGESIZE));
373 vec = malloc(off);
374 if (vec != NULL) {
375 memset(vec, 0, off);
376 if (mincore(rrd_file->file_start, rrd_file->file_len, vec) == 0) {
377 int prev;
378 unsigned is_in = 0, was_in = 0;
380 for (off = 0, prev = 0; off < rrd_file->file_len; ++off) {
381 is_in = vec[off] & 1; /* if lsb set then is core resident */
382 if (off == 0)
383 was_in = is_in;
384 if (was_in != is_in) {
385 fprintf(stderr, "%sin core: %p len %ld\n",
386 was_in ? "" : "not ", vec + prev, off - prev);
387 was_in = is_in;
388 prev = off;
389 }
390 }
391 fprintf(stderr,
392 "%sin core: %p len %ld\n",
393 was_in ? "" : "not ", vec + prev, off - prev);
394 } else
395 fprintf(stderr, "mincore: %s", rrd_strerror(errno));
396 }
397 #endif /* DEBUG */
399 #ifdef USE_MADVISE
400 #ifdef TWO_PAGES
401 //XXX: ?
402 /* Keep 2 pages worth of headers around, round up to next page boundary. */
403 ret =
404 PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len);
405 if (rrd_file->file_len > ret)
406 _madvise(rrd_file->file_start + ret,
407 rrd_file->file_len - ret, MADV_DONTNEED);
408 #else
409 /* ignoring errors from RRDs that are smaller then the file_len+rounding */
410 _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len),
411 rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len),
412 MADV_DONTNEED);
413 #endif
414 #endif
415 #ifdef HAVE_MMAP
416 ret = munmap(rrd_file->file_start, rrd_file->file_len);
417 if (ret != 0)
418 rrd_set_error("munmap rrd_file: %s", rrd_strerror(errno));
419 #else
420 ret = 0;
421 #endif
422 // ret = close(rrd_file->fd);
423 // if (ret != 0)
424 // rrd_set_error("closing file: %s", rrd_strerror(errno));
425 free(rrd_file);
426 rrd_file = NULL;
427 return ret;
428 }
431 /* Set position of rrd_file. */
433 off_t rrd_seek(
434 rrd_file_t *rrd_file,
435 off_t off,
436 int whence)
437 {
438 off_t ret = 0;
440 #ifdef HAVE_MMAP
441 if (whence == SEEK_SET)
442 rrd_file->pos = off;
443 else if (whence == SEEK_CUR)
444 rrd_file->pos += off;
445 else if (whence == SEEK_END)
446 rrd_file->pos = rrd_file->file_len + off;
447 #else
448 ret = lseek(rrd_file->fd, off, whence);
449 if (ret < 0)
450 rrd_set_error("lseek: %s", rrd_strerror(errno));
451 rrd_file->pos = ret;
452 #endif
453 //XXX: mimic fseek, which returns 0 upon success
454 return ret == -1; //XXX: or just ret to mimic lseek
455 }
458 /* Get current position in rrd_file. */
460 inline off_t rrd_tell(rrd_file_t *rrd_file)
461 {
462 return rrd_file->pos;
463 }
466 /* read count bytes into buffer buf, starting at rrd_file->pos.
467 * Returns the number of bytes read. */
469 ssize_t rrd_read(
470 rrd_file_t *rrd_file,
471 void *buf,
472 size_t count)
473 {
474 #ifdef HAVE_MMAP
475 buf = memmove(buf, rrd_file->file_start + rrd_file->pos, count);
476 rrd_file->pos += count; /* mimmic read() semantics */
477 return count;
478 #else
479 ssize_t ret;
481 ret = read(rrd_file->fd, buf, count);
482 //XXX: eventually add generic rrd_set_error(""); here
483 rrd_file->pos += count; /* mimmic read() semantics */
484 return ret;
485 #endif
486 }
489 /* write count bytes from buffer buf to the current position
490 * rrd_file->pos of rrd_file->fd.
491 * Returns the number of bytes written. */
493 ssize_t rrd_write(
494 rrd_file_t *rrd_file,
495 const void *buf,
496 size_t count)
497 {
498 #ifdef HAVE_MMAP
499 memmove(rrd_file->file_start + rrd_file->pos, buf, count);
500 return count; /* mimmic write() semantics */
501 #else
502 return write(rrd_file->fd, buf, count);
503 #endif
504 }
507 /* flush all data pending to be written to FD. */
509 inline void rrd_flush(
510 rrd_file_t *rrd_file)
511 {
512 if (fdatasync(rrd_file->fd) != 0) {
513 rrd_set_error("flushing fd %d: %s", rrd_file->fd,
514 rrd_strerror(errno));
515 }
516 }
519 /* Initialize RRD header. */
521 void rrd_init(
522 rrd_t *rrd)
523 {
524 rrd->stat_head = NULL;
525 rrd->ds_def = NULL;
526 rrd->rra_def = NULL;
527 rrd->live_head = NULL;
528 rrd->rra_ptr = NULL;
529 rrd->pdp_prep = NULL;
530 rrd->cdp_prep = NULL;
531 rrd->rrd_value = NULL;
532 }
535 /* free RRD header data. */
537 void rrd_free(
538 rrd_t UNUSED(*rrd))
539 {
540 #ifndef HAVE_MMAP
541 if (atoi(rrd->stat_head->version) < 3)
542 free(rrd->live_head);
543 free(rrd->stat_head);
544 free(rrd->ds_def);
545 free(rrd->rra_def);
546 free(rrd->rra_ptr);
547 free(rrd->pdp_prep);
548 free(rrd->cdp_prep);
549 free(rrd->rrd_value);
550 #endif
551 }
554 /* routine used by external libraries to free memory allocated by
555 * rrd library */
557 void rrd_freemem(
558 void *mem)
559 {
560 free(mem);
561 }
564 /* XXX: FIXME: missing documentation. */
565 /*XXX: FIXME should be renamed to rrd_readfile or _rrd_readfile */
567 int /*_rrd_*/ readfile(
568 const char *file_name,
569 char **buffer,
570 int skipfirst)
571 {
572 long writecnt = 0, totalcnt = MEMBLK;
573 long offset = 0;
574 FILE *input = NULL;
575 char c;
577 if ((strcmp("-", file_name) == 0)) {
578 input = stdin;
579 } else {
580 if ((input = fopen(file_name, "rb")) == NULL) {
581 rrd_set_error("opening '%s': %s", file_name, rrd_strerror(errno));
582 return (-1);
583 }
584 }
585 if (skipfirst) {
586 do {
587 c = getc(input);
588 offset++;
589 } while (c != '\n' && !feof(input));
590 }
591 if (strcmp("-", file_name)) {
592 fseek(input, 0, SEEK_END);
593 /* have extra space for detecting EOF without realloc */
594 totalcnt = (ftell(input) + 1) / sizeof(char) - offset;
595 if (totalcnt < MEMBLK)
596 totalcnt = MEMBLK; /* sanitize */
597 fseek(input, offset * sizeof(char), SEEK_SET);
598 }
599 if (((*buffer) = (char *) malloc((totalcnt + 4) * sizeof(char))) == NULL) {
600 perror("Allocate Buffer:");
601 exit(1);
602 };
603 do {
604 writecnt +=
605 fread((*buffer) + writecnt, 1,
606 (totalcnt - writecnt) * sizeof(char), input);
607 if (writecnt >= totalcnt) {
608 totalcnt += MEMBLK;
609 if (((*buffer) =
610 rrd_realloc((*buffer),
611 (totalcnt + 4) * sizeof(char))) == NULL) {
612 perror("Realloc Buffer:");
613 exit(1);
614 };
615 }
616 } while (!feof(input));
617 (*buffer)[writecnt] = '\0';
618 if (strcmp("-", file_name) != 0) {
619 fclose(input);
620 };
621 return writecnt;
622 }