X-Git-Url: https://git.tokkee.org/?a=blobdiff_plain;f=src%2Fdisplay%2Fnr-filter-gaussian.cpp;h=76b541ace61029b0ac3d855ccb1e99015b049dbc;hb=77eda576f455eeb23c7b92510f38bc60738473ab;hp=be0c9c5be34c7d5a99e1708c33aba02500930a26;hpb=24c6ddb693cb37e6e68798b6fff862d5968c5891;p=inkscape.git diff --git a/src/display/nr-filter-gaussian.cpp b/src/display/nr-filter-gaussian.cpp index be0c9c5be..76b541ace 100644 --- a/src/display/nr-filter-gaussian.cpp +++ b/src/display/nr-filter-gaussian.cpp @@ -17,15 +17,18 @@ #include #include #include +#include #include -using std::isnormal; +#include "2geom/isnan.h" #include "display/nr-filter-primitive.h" #include "display/nr-filter-gaussian.h" #include "display/nr-filter-types.h" +#include "display/nr-filter-units.h" #include "libnr/nr-pixblock.h" #include "libnr/nr-matrix.h" +#include "libnr/nr-matrix-fns.h" #include "util/fixed_point.h" #include "prefs-utils.h" @@ -38,13 +41,13 @@ using std::isnormal; // Using the backwards-pass initialization procedure from: // Boundary Conditions for Young - van Vliet Recursive Filtering // Bill Triggs, Michael Sdika -// IEEE Transactions on Signal Processing, Volume 54, Number 5 - may 2006 +// IEEE Transactions on Signal Processing, Volume 54, Number 5 - may 2006 // Number of IIR filter coefficients used. Currently only 3 is supported. // "Recursive Gaussian Derivative Filters" says this is enough though (and // some testing indeed shows that the quality doesn't improve much if larger // filters are used). -const size_t N = 3; +static size_t const N = 3; template void copy_n(InIt beg_in, Size N, OutIt beg_out) { @@ -52,7 +55,7 @@ void copy_n(InIt beg_in, Size N, OutIt beg_out) { } // Type used for IIR filter coefficients (can be 10.21 signed fixed point, see Anisotropic Gaussian Filtering Using Fixed Point Arithmetic, Christoph H. Lampert & Oliver Wirjadi, 2006) -typedef double IIRValue; +typedef double IIRValue; // Type used for FIR filter coefficients (can be 16.16 unsigned fixed point, should have 8 or more bits in the fractional part, the integer part should be capable of storing approximately 20*255) typedef Inkscape::Util::FixedPoint FIRValue; @@ -67,16 +70,15 @@ template static inline T clip(T const& v, T const& a, T const& b) { template static inline Tt round_cast(Ts const& v) { - static Ts const rndoffset(.5); - return static_cast(v+rndoffset); + static Ts const rndoffset(.5); + return static_cast(v+rndoffset); } template -static inline Tt clip_round_cast(Ts const& v) { - static Ts const rndoffset(.5); - if ( v < std::numeric_limits::min() ) return std::numeric_limits::min(); - if ( v > std::numeric_limits::max() ) return std::numeric_limits::max(); - return static_cast(v+rndoffset); +static inline Tt clip_round_cast(Ts const& v, Tt const minval=std::numeric_limits::min(), Tt const maxval=std::numeric_limits::max()) { + if ( v < minval ) return minval; + if ( v > maxval ) return maxval; + return round_cast(v); } namespace NR { @@ -96,12 +98,14 @@ FilterGaussian::~FilterGaussian() // Nothing to do here } -int _effect_area_scr(double deviation) +static int +_effect_area_scr(double const deviation) { return (int)std::ceil(deviation * 3.0); } -void _make_kernel(FIRValue *kernel, double deviation) +static void +_make_kernel(FIRValue *const kernel, double const deviation) { int const scr_len = _effect_area_scr(deviation); double const d_sq = sqr(deviation) * 2; @@ -109,18 +113,23 @@ void _make_kernel(FIRValue *kernel, double deviation) // Compute kernel and sum of coefficients // Note that actually only half the kernel is computed, as it is symmetric - FIRValue sum = 0; - for ( int i = 0; i <= scr_len ; i++ ) { + double sum = 0; + for ( int i = scr_len; i >= 0 ; i-- ) { k[i] = std::exp(-sqr(i) / d_sq); - sum += static_cast(k[i]); + if ( i > 0 ) sum += k[i]; } - // the sum of the complete kernel is twice as large (minus the center element to avoid counting it twice) - sum = 2*sum - static_cast(k[0]); - - // Normalize kernel - for ( int i = 0; i <= scr_len ; i++ ) { - kernel[i] = k[i]/static_cast(sum); + // the sum of the complete kernel is twice as large (plus the center element which we skipped above to prevent counting it twice) + sum = 2*sum + k[0]; + + // Normalize kernel (making sure the sum is exactly 1) + double ksum = 0; + FIRValue kernelsum = 0; + for ( int i = scr_len; i >= 1 ; i-- ) { + ksum += k[i]/sum; + kernel[i] = ksum-static_cast(kernelsum); + kernelsum += kernel[i]; } + kernel[0] = FIRValue(1)-2*kernelsum; } // Return value (v) should satisfy: @@ -130,7 +139,8 @@ void _make_kernel(FIRValue *kernel, double deviation) // 8<=32-2*v // 2*v<=24 // v<=12 -int _effect_subsample_step_log2(double deviation, int quality) +static int +_effect_subsample_step_log2(double const deviation, int const quality) { // To make sure FIR will always be used (unless the kernel is VERY big): // deviation/step <= 3 @@ -177,7 +187,8 @@ int _effect_subsample_step_log2(double deviation, int quality) * Catches reading and writing outside the pixblock area. * When enabled, decreases filter rendering speed massively. */ -inline void _check_index(NRPixBlock const * const pb, int const location, int const line) +static inline void +_check_index(NRPixBlock const * const pb, int const location, int const line) { if (false) { int max_loc = pb->rs * (pb->area.y1 - pb->area.y0); @@ -252,8 +263,13 @@ static void calcTriggsSdikaInitialization(double const M[N*N], IIRValue const uo } // Filters over 1st dimension -template -void filter2D_IIR(PT *dest, int dstr1, int dstr2, PT const *src, int sstr1, int sstr2, int n1, int n2, IIRValue const b[N+1], double const M[N*N], IIRValue *const tmpdata) { +template +static void +filter2D_IIR(PT *const dest, int const dstr1, int const dstr2, + PT const *const src, int const sstr1, int const sstr2, + int const n1, int const n2, IIRValue const b[N+1], double const M[N*N], + IIRValue *const tmpdata) +{ for ( int c2 = 0 ; c2 < n2 ; c2++ ) { // corresponding line in the source and output buffer PT const * srcimg = src + c2*sstr2; @@ -276,9 +292,14 @@ void filter2D_IIR(PT *dest, int dstr1, int dstr2, PT const *src, int sstr1, int } // Backward pass IIRValue v[N+1][PC]; - calcTriggsSdikaInitialization(M, u, iplus, iplus, b[0], v); + calcTriggsSdikaInitialization(M, u, iplus, iplus, b[0], v); dstimg -= dstr1; - for(unsigned int c=0; c(v[0][c]); + if ( PREMULTIPLIED_ALPHA ) { + dstimg[PC-1] = clip_round_cast(v[0][PC-1]); + for(unsigned int c=0; c(v[0][c], std::numeric_limits::min(), dstimg[PC-1]); + } else { + for(unsigned int c=0; c(v[0][c]); + } int c1=n1-1; while(c1-->0) { for(unsigned int i=N; i>0; i--) copy_n(v[i-1], PC, v[i]); @@ -288,7 +309,12 @@ void filter2D_IIR(PT *dest, int dstr1, int dstr2, PT const *src, int sstr1, int for(unsigned int c=0; c(v[0][c]); + if ( PREMULTIPLIED_ALPHA ) { + dstimg[PC-1] = clip_round_cast(v[0][PC-1]); + for(unsigned int c=0; c(v[0][c], std::numeric_limits::min(), dstimg[PC-1]); + } else { + for(unsigned int c=0; c(v[0][c]); + } } } } @@ -297,7 +323,11 @@ void filter2D_IIR(PT *dest, int dstr1, int dstr2, PT const *src, int sstr1, int // Assumes kernel is symmetric // scr_len should be size of kernel - 1 template -void filter2D_FIR(PT *dst, int dstr1, int dstr2, PT const *src, int sstr1, int sstr2, int n1, int n2, FIRValue const *const kernel, int scr_len) { +static void +filter2D_FIR(PT *const dst, int const dstr1, int const dstr2, + PT const *const src, int const sstr1, int const sstr2, + int const n1, int const n2, FIRValue const *const kernel, int const scr_len) +{ // Past pixels seen (to enable in-place operation) PT history[scr_len+1][PC]; @@ -371,8 +401,8 @@ void filter2D_FIR(PT *dst, int dstr1, int dstr2, PT const *src, int sstr1, int s // store the result in bufx dst[dst_disp + byte] = round_cast(sum); - // optimization: if there was no variation within this point's neighborhood, - // skip ahead while we keep seeing the same last_in byte: + // optimization: if there was no variation within this point's neighborhood, + // skip ahead while we keep seeing the same last_in byte: // blurring flat color would not change it anyway if (different_count <= 1) { int pos = c1 + 1; @@ -392,7 +422,11 @@ void filter2D_FIR(PT *dst, int dstr1, int dstr2, PT const *src, int sstr1, int s } template -void downsample(PT *dst, int dstr1, int dstr2, int dn1, int dn2, PT const *src, int sstr1, int sstr2, int sn1, int sn2, int step1_l2, int step2_l2) { +static void +downsample(PT *const dst, int const dstr1, int const dstr2, int const dn1, int const dn2, + PT const *const src, int const sstr1, int const sstr2, int const sn1, int const sn2, + int const step1_l2, int const step2_l2) +{ unsigned int const divisor_l2 = step1_l2+step2_l2; // step1*step2=2^(step1_l2+step2_l2) unsigned int const round_offset = (1< -void upsample(PT *dst, int dstr1, int dstr2, unsigned int dn1, unsigned int dn2, PT const *src, int sstr1, int sstr2, unsigned int sn1, unsigned int sn2, unsigned int step1_l2, unsigned int step2_l2) { +static void +upsample(PT *const dst, int const dstr1, int const dstr2, unsigned int const dn1, unsigned int const dn2, + PT const *const src, int const sstr1, int const sstr2, unsigned int const sn1, unsigned int const sn2, + unsigned int const step1_l2, unsigned int const step2_l2) +{ assert(((sn1-1)<=dn1 && ((sn2-1)<=dn2); // The last pixel of the source image should fall outside the destination image unsigned int const divisor_l2 = step1_l2+step2_l2; // step1*step2=2^(step1_l2+step2_l2) unsigned int const round_offset = (1<area.x1-in->area.x0, height_org = in->area.y1-in->area.y0; - double const deviation_x_org = _deviation_x * trans.expansionX(); - double const deviation_y_org = _deviation_y * trans.expansionY(); + double const deviation_x_org = _deviation_x * NR::expansionX(trans); + double const deviation_y_org = _deviation_y * NR::expansionY(trans); int const PC = NR_PIXBLOCK_BPP(in); // Subsampling constants - int const quality = prefs_get_int_attribute("options.blurquality", "value", 0); + int const quality = prefs_get_int_attribute("options.blurquality", "value", 0); int const x_step_l2 = _effect_subsample_step_log2(deviation_x_org, quality); int const y_step_l2 = _effect_subsample_step_log2(deviation_y_org, quality); int const x_step = 1<mode) { case NR_PIXBLOCK_MODE_A8: ///< Grayscale - filter2D_IIR(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, b, M, tmpdata); break; case NR_PIXBLOCK_MODE_R8G8B8: ///< 8 bit RGB - filter2D_IIR(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, b, M, tmpdata); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: ///< Normal 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: ///< Premultiplied 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata); break; default: assert(false); @@ -643,16 +687,16 @@ int FilterGaussian::render(FilterSlot &slot, Matrix const &trans) // Filter (y) switch(in->mode) { case NR_PIXBLOCK_MODE_A8: ///< Grayscale - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, b, M, tmpdata); break; case NR_PIXBLOCK_MODE_R8G8B8: ///< 8 bit RGB - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, b, M, tmpdata); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: ///< Normal 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: ///< Premultiplied 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata); break; default: assert(false); @@ -729,23 +773,33 @@ int FilterGaussian::render(FilterSlot &slot, Matrix const &trans) return 0; } -int FilterGaussian::get_enlarge(Matrix const &trans) +void FilterGaussian::area_enlarge(NRRectL &area, Matrix const &trans) { - int area_x = _effect_area_scr(_deviation_x * trans.expansionX()); - int area_y = _effect_area_scr(_deviation_y * trans.expansionY()); - return std::max(area_x, area_y); + int area_x = _effect_area_scr(_deviation_x * NR::expansionX(trans)); + int area_y = _effect_area_scr(_deviation_y * NR::expansionY(trans)); + // maximum is used because rotations can mix up these directions + // TODO: calculate a more tight-fitting rendering area + int area_max = std::max(area_x, area_y); + area.x0 -= area_max; + area.x1 += area_max; + area.y0 -= area_max; + area.y1 += area_max; +} + +FilterTraits FilterGaussian::get_input_traits() { + return TRAIT_PARALLER; } void FilterGaussian::set_deviation(double deviation) { - if(isnormal(deviation) && deviation >= 0) { + if(IS_FINITE(deviation) && deviation >= 0) { _deviation_x = _deviation_y = deviation; } } void FilterGaussian::set_deviation(double x, double y) { - if(isnormal(x) && x >= 0 && isnormal(y) && y >= 0) { + if(IS_FINITE(x) && x >= 0 && IS_FINITE(y) && y >= 0) { _deviation_x = x; _deviation_y = y; }