From 1e491ed6d54facbe59e9a6f370af8aec200098b7 Mon Sep 17 00:00:00 2001 From: oetiker Date: Wed, 18 Jul 2007 00:30:49 +0000 Subject: [PATCH] * rrd_open: rrd_close does not purge file from cache * rrd_open: new function rrd_dontneed for purging un-needed pages from core * rrd_open: in linux at least only fadivse DONTNEED has the power to purge pages from cache, so letst call madvise as well as fadvise * rrd_create: uses open/write/close now, flushes file to disk and keeps only hot pages in core * rrd_update: keeps only hot pages in core * configure enables FADVISE even when mmap is in use git-svn-id: svn://svn.oetiker.ch/rrdtool/trunk/program@1156 a5681a0c-68f1-0310-ab6d-d61299d08faa --- src/rrd_create.c | 75 +++++++++++------------------- src/rrd_open.c | 118 ++++++++++++++++++++++++++++++----------------- src/rrd_tool.h | 3 ++ src/rrd_update.c | 22 +-------- 4 files changed, 106 insertions(+), 112 deletions(-) diff --git a/src/rrd_create.c b/src/rrd_create.c index a5c304f..a3b73ca 100644 --- a/src/rrd_create.c +++ b/src/rrd_create.c @@ -624,31 +624,31 @@ int rrd_create_fn( rrd_t *rrd) { unsigned long i, ii; - FILE *rrd_file; + int rrd_file; rrd_value_t *unknown; int unkn_cnt; + rrd_file_t *rrd_file_dn; + rrd_t rrd_dn; - long rrd_head_size; - - if ((rrd_file = fopen(file_name, "wb")) == NULL) { + if ((rrd_file = open(file_name, O_WRONLY|O_CREAT|O_TRUNC,0666)) == NULL) { rrd_set_error("creating '%s': %s", file_name, rrd_strerror(errno)); rrd_free(rrd); return (-1); } - fwrite(rrd->stat_head, sizeof(stat_head_t), 1, rrd_file); + write(rrd_file,rrd->stat_head, sizeof(stat_head_t)); - fwrite(rrd->ds_def, sizeof(ds_def_t), rrd->stat_head->ds_cnt, rrd_file); + write(rrd_file,rrd->ds_def, sizeof(ds_def_t)*rrd->stat_head->ds_cnt); - fwrite(rrd->rra_def, - sizeof(rra_def_t), rrd->stat_head->rra_cnt, rrd_file); + write(rrd_file,rrd->rra_def, + sizeof(rra_def_t)* rrd->stat_head->rra_cnt); - fwrite(rrd->live_head, sizeof(live_head_t), 1, rrd_file); + write(rrd_file,rrd->live_head, sizeof(live_head_t)); if ((rrd->pdp_prep = calloc(1, sizeof(pdp_prep_t))) == NULL) { rrd_set_error("allocating pdp_prep"); rrd_free(rrd); - fclose(rrd_file); + close(rrd_file); return (-1); } @@ -659,12 +659,12 @@ int rrd_create_fn( rrd->live_head->last_up % rrd->stat_head->pdp_step; for (i = 0; i < rrd->stat_head->ds_cnt; i++) - fwrite(rrd->pdp_prep, sizeof(pdp_prep_t), 1, rrd_file); + write(rrd_file,rrd->pdp_prep, sizeof(pdp_prep_t)); if ((rrd->cdp_prep = calloc(1, sizeof(cdp_prep_t))) == NULL) { rrd_set_error("allocating cdp_prep"); rrd_free(rrd); - fclose(rrd_file); + close(rrd_file); return (-1); } @@ -701,7 +701,7 @@ int rrd_create_fn( } for (ii = 0; ii < rrd->stat_head->ds_cnt; ii++) { - fwrite(rrd->cdp_prep, sizeof(cdp_prep_t), 1, rrd_file); + write(rrd_file,rrd->cdp_prep, sizeof(cdp_prep_t)); } } @@ -711,7 +711,7 @@ int rrd_create_fn( if ((rrd->rra_ptr = calloc(1, sizeof(rra_ptr_t))) == NULL) { rrd_set_error("allocating rra_ptr"); rrd_free(rrd); - fclose(rrd_file); + close(rrd_file); return (-1); } @@ -721,15 +721,14 @@ int rrd_create_fn( * the pointer a priori. */ for (i = 0; i < rrd->stat_head->rra_cnt; i++) { rrd->rra_ptr->cur_row = rrd->rra_def[i].row_cnt - 1; - fwrite(rrd->rra_ptr, sizeof(rra_ptr_t), 1, rrd_file); + write(rrd_file,rrd->rra_ptr, sizeof(rra_ptr_t)); } - rrd_head_size = ftell(rrd_file); /* write the empty data area */ if ((unknown = (rrd_value_t *) malloc(512 * sizeof(rrd_value_t))) == NULL) { rrd_set_error("allocating unknown"); rrd_free(rrd); - fclose(rrd_file); + close(rrd_file); return (-1); } for (i = 0; i < 512; ++i) @@ -740,40 +739,20 @@ int rrd_create_fn( unkn_cnt += rrd->stat_head->ds_cnt * rrd->rra_def[i].row_cnt; while (unkn_cnt > 0) { - fwrite(unknown, sizeof(rrd_value_t), min(unkn_cnt, 512), rrd_file); + write(rrd_file,unknown, sizeof(rrd_value_t) * min(unkn_cnt, 512)); + unkn_cnt -= 512; } free(unknown); - - /* lets see if we had an error */ - if (ferror(rrd_file)) { - rrd_set_error("a file error occurred while creating '%s'", file_name); - fclose(rrd_file); - rrd_free(rrd); - return (-1); - } -#ifdef HAVE_POSIX_FADVISE - /* this file is not going to be read again any time - soon, so we drop everything except the header portion from - the buffer cache. for this to work, we have to fdsync the file - first though. This will not be all that fast, but 'good' data - like other rrdfiles headers will stay in cache. Now this only works if creating - a single rrd file is not too large, but I assume this should not be the case - in general. Otherwhise we would have to sync and release while writing all - the unknown data. */ - fflush(rrd_file); - fdatasync(fileno(rrd_file)); - if (0 != - posix_fadvise(fileno(rrd_file), rrd_head_size, 0, - POSIX_FADV_DONTNEED)) { - rrd_set_error("setting POSIX_FADV_DONTNEED on '%s': %s", file_name, - rrd_strerror(errno)); - fclose(rrd_file); - return (-1); - } -#endif - - fclose(rrd_file); + fdatasync(rrd_file); rrd_free(rrd); + if ( close(rrd_file) == -1 ) { + rrd_set_error("creating rrd: %s", rrd_strerror(errno)); + return -1; + } + /* flush all we don't need out of the cache */ + rrd_file_dn = rrd_open(file_name, &rrd_dn, RRD_READONLY); + rrd_dontneed(rrd_file_dn,&rrd_dn); + rrd_close(rrd_file_dn); return (0); } diff --git a/src/rrd_open.c b/src/rrd_open.c index a01b075..d701204 100644 --- a/src/rrd_open.c +++ b/src/rrd_open.c @@ -11,7 +11,7 @@ #define MEMBLK 8192 /* DEBUG 2 prints information obtained via mincore(2) */ -// #define DEBUG 2 +//#define DEBUG 2 /* do not calculate exact madvise hints but assume 1 page for headers and * set DONTNEED for the rest, which is assumed to be data */ //#define ONE_PAGE 1 @@ -35,13 +35,9 @@ offset += read (rrd_file->fd, dst, sizeof(dst_t)*(cnt)) #endif -/* next page-aligned (i.e. page-align up) */ -#ifndef PAGE_ALIGN -#define PAGE_ALIGN(addr) (((addr)+_page_size-1)&(~(_page_size-1))) -#endif -/* previous page-aligned (i.e. page-align down) */ -#ifndef PAGE_ALIGN_DOWN -#define PAGE_ALIGN_DOWN(addr) (((addr)+_page_size-1)&(~(_page_size-1))) +/* get the address of the start of this page */ +#ifndef PAGE_START +#define PAGE_START(addr) ((addr)&(~(_page_size-1))) #endif #ifdef HAVE_MMAP @@ -213,11 +209,9 @@ rrd_file_t *rrd_open( } else { # ifndef ONE_PAGE /* We do not need to read anything in for the moment */ - _madvise(data, rrd_file->file_len, MADV_DONTNEED); + _madvise(data, rrd_file->file_len, MADV_RANDOM); /* the stat_head will be needed soonish, so hint accordingly */ - _madvise(data + PAGE_ALIGN_DOWN(offset), - PAGE_ALIGN(sizeof(stat_head_t)), - MADV_WILLNEED | MADV_RANDOM); + _madvise(data, sizeof(stat_head_t), MADV_WILLNEED | MADV_RANDOM); # else /* alternatively: keep 1 page worth of data, likely headers, @@ -252,8 +246,8 @@ rrd_file_t *rrd_open( } #if defined USE_MADVISE && !defined ONE_PAGE /* the ds_def will be needed soonish, so hint accordingly */ - _madvise(data + PAGE_ALIGN_DOWN(offset), - PAGE_ALIGN(sizeof(ds_def_t) * rrd->stat_head->ds_cnt), + _madvise(data + PAGE_START(offset), + sizeof(ds_def_t) * rrd->stat_head->ds_cnt, MADV_WILLNEED); #endif __rrd_read(rrd->ds_def, ds_def_t, @@ -261,8 +255,8 @@ rrd_file_t *rrd_open( #if defined USE_MADVISE && !defined ONE_PAGE /* the rra_def will be needed soonish, so hint accordingly */ - _madvise(data + PAGE_ALIGN_DOWN(offset), - PAGE_ALIGN(sizeof(rra_def_t) * rrd->stat_head->rra_cnt), + _madvise(data + PAGE_START(offset), + sizeof(rra_def_t) * rrd->stat_head->rra_cnt, MADV_WILLNEED); #endif __rrd_read(rrd->rra_def, rra_def_t, @@ -285,8 +279,8 @@ rrd_file_t *rrd_open( } else { #if defined USE_MADVISE && !defined ONE_PAGE /* the live_head will be needed soonish, so hint accordingly */ - _madvise(data + PAGE_ALIGN_DOWN(offset), - PAGE_ALIGN(sizeof(live_head_t)), MADV_WILLNEED); + _madvise(data + PAGE_START(offset), + sizeof(live_head_t), MADV_WILLNEED); #endif __rrd_read(rrd->live_head, live_head_t, 1); @@ -318,19 +312,13 @@ rrd_file_t *rrd_open( /* Close a reference to an rrd_file. */ - -int rrd_close( - rrd_file_t *rrd_file) -{ - int ret; - -#if defined HAVE_MMAP || defined DEBUG - ssize_t _page_size = sysconf(_SC_PAGESIZE); -#endif -#if defined DEBUG && DEBUG > 1 +static +void mincore_print(rrd_file_t *rrd_file,char * mark){ +#ifdef HAVE_MMAP /* pretty print blocks in core */ off_t off; unsigned char *vec; + ssize_t _page_size = sysconf(_SC_PAGESIZE); off = rrd_file->file_len + ((rrd_file->file_len + _page_size - 1) / _page_size); @@ -346,35 +334,79 @@ int rrd_close( if (off == 0) was_in = is_in; if (was_in != is_in) { - fprintf(stderr, "%sin core: %p len %ld\n", + fprintf(stderr, "%s: %sin core: %p len %ld\n",mark, was_in ? "" : "not ", vec + prev, off - prev); was_in = is_in; prev = off; } } fprintf(stderr, - "%sin core: %p len %ld\n", + "%s: %sin core: %p len %ld\n", mark, was_in ? "" : "not ", vec + prev, off - prev); } else fprintf(stderr, "mincore: %s", rrd_strerror(errno)); } -#endif /* DEBUG */ +#else + fprintf(stderr, "sorry mincore only works with mmap"); +#endif +} + + +/* drop cache except for the header and the active pages */ +void +rrd_dontneed ( + rrd_file_t *rrd_file, + rrd_t *rrd){ + unsigned long dontneed_start; + unsigned long rra_start; + unsigned long active_block; + unsigned long i; + ssize_t _page_size = sysconf(_SC_PAGESIZE); + +#if defined DEBUG && DEBUG > 1 + mincore_print(rrd_file,"before"); +#endif -#ifdef USE_MADVISE -# ifdef ONE_PAGE - /* Keep headers around, round up to next page boundary. */ - ret = - PAGE_ALIGN(rrd_file->header_len % _page_size + rrd_file->header_len); - if (rrd_file->file_len > ret) - _madvise(rrd_file->file_start + ret, - rrd_file->file_len - ret, MADV_DONTNEED); -# else /* ignoring errors from RRDs that are smaller then the file_len+rounding */ - _madvise(rrd_file->file_start + PAGE_ALIGN_DOWN(rrd_file->header_len), - rrd_file->file_len - PAGE_ALIGN(rrd_file->header_len), + rra_start = rrd_file->header_len; + dontneed_start = PAGE_START(rra_start)+_page_size; + for (i = 0; i < rrd->stat_head->rra_cnt; ++i) { + active_block = + PAGE_START(rra_start + + rrd->rra_ptr[i].cur_row + * rrd->stat_head->ds_cnt + * sizeof(rrd_value_t)); + if (active_block > dontneed_start){ +#ifdef USE_MADVISE + _madvise(rrd_file->file_start + dontneed_start, + active_block-dontneed_start-1, + MADV_DONTNEED); +#endif +/* in linux at least only fadvise DONTNEED seems to purge pages from cache */ +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(rrd_file->fd, dontneed_start, active_block-dontneed_start-1, POSIX_FADV_DONTNEED); +#endif + } + dontneed_start = active_block + _page_size; + rra_start += rrd->rra_def[i].row_cnt * rrd->stat_head->ds_cnt * sizeof(rrd_value_t); + } +#ifdef USE_MADVISE + _madvise(rrd_file->file_start + dontneed_start, + rrd_file->file_len - dontneed_start, MADV_DONTNEED); -# endif #endif +#ifdef HAVE_POSIX_FADVISE + posix_fadvise(rrd_file->fd, dontneed_start, rrd_file->file_len-dontneed_start, POSIX_FADV_DONTNEED); +#endif +#if defined DEBUG && DEBUG > 1 + mincore_print(rrd_file,"after"); +#endif +} + +int rrd_close( + rrd_file_t *rrd_file) +{ + int ret; #ifdef HAVE_MMAP ret = munmap(rrd_file->file_start, rrd_file->file_len); if (ret != 0) diff --git a/src/rrd_tool.h b/src/rrd_tool.h index 4f43dbf..06f4de1 100644 --- a/src/rrd_tool.h +++ b/src/rrd_tool.h @@ -137,6 +137,9 @@ extern "C" { const char *const file_name, rrd_t *rrd, unsigned rdwr); + void rrd_dontneed( + rrd_file_t *rrd_file, + rrd_t *rrd); int rrd_close( rrd_file_t *rrd_file); ssize_t rrd_read( diff --git a/src/rrd_update.c b/src/rrd_update.c index fa60c0b..9a1f32d 100644 --- a/src/rrd_update.c +++ b/src/rrd_update.c @@ -1443,19 +1443,7 @@ int _rrd_update( goto err_free_pdp_new; } #endif -#ifdef HAVE_POSIX_FADVISExxx - /* with update we have write ops, so they will probably not be done by now, this means - the buffers will not get freed. But calling this for the whole file - header - will let the data off the hook as soon as it is written when if it is from a previous - update cycle. Calling fdsync to force things is much too hard here. */ - - if (0 != posix_fadvise(rrd_file->fd, rra_begin, 0, POSIX_FADV_DONTNEED)) { - rrd_set_error("setting POSIX_FADV_DONTNEED on '%s': %s", filename, - rrd_strerror(errno)); - goto err_free_pdp_new; - } -#endif /* rrd_flush(rrd_file); */ /* calling the smoothing code here guarantees at most @@ -1479,17 +1467,9 @@ int _rrd_update( rra_start += rrd.rra_def[i].row_cnt * rrd.stat_head->ds_cnt * sizeof(rrd_value_t); } -#ifdef HAVE_POSIX_FADVISExxx - /* same procedure as above ... */ - if (0 != - posix_fadvise(rrd_file->fd, rra_begin, 0, POSIX_FADV_DONTNEED)) { - rrd_set_error("setting POSIX_FADV_DONTNEED on '%s': %s", filename, - rrd_strerror(errno)); - goto err_free_pdp_new; - } -#endif } + rrd_dontneed(rrd_file,&rrd); rrd_free(&rrd); rrd_close(rrd_file); -- 2.30.2