From: Ted Gould Date: Sun, 23 Nov 2008 04:44:08 +0000 (-0600) Subject: Merging in the OpenMP work from the mailing list with some improved build files. X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=6280cd88bbde7d6e2b5b9809e0bfb3af055a9cc3;p=inkscape.git Merging in the OpenMP work from the mailing list with some improved build files. --- diff --git a/acinclude.m4 b/acinclude.m4 index 5cd65aea0..b730d0d60 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -28,3 +28,104 @@ AC_DEFUN([RELAYTOOL], [ fi ]) +# =========================================================================== +# http://autoconf-archive.cryp.to/ax_openmp.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro tries to find out how to compile programs that use OpenMP a +# standard API and set of compiler directives for parallel programming +# (see http://www-unix.mcs/) +# +# On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS +# output variable to the flag (e.g. -omp) used both to compile *and* link +# OpenMP programs in the current language. +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. +# +# If you want to compile everything with OpenMP, you should set: +# +# CFLAGS="$CFLAGS $OPENMP_CFLAGS" +# #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" +# #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" +# +# (depending on the selected language). +# +# The user can override the default choice by setting the corresponding +# environment variable (e.g. OPENMP_CFLAGS). +# +# ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is +# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is +# not found. If ACTION-IF-FOUND is not specified, the default action will +# define HAVE_OPENMP. +# +# LAST MODIFICATION +# +# 2008-04-12 +# +# COPYLEFT +# +# Copyright (c) 2008 Steven G. Johnson +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Macro Archive. When you make and +# distribute a modified version of the Autoconf Macro, you may extend this +# special exception to the GPL to apply to your modified version as well. + +AC_DEFUN([AX_OPENMP], [ +AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX + +AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS +ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown +# Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), +# -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none +ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" +if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then + ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" +fi +for ax_openmp_flag in $ax_openmp_flags; do + case $ax_openmp_flag in + none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; + *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; + esac + AC_TRY_LINK_FUNC(omp_set_num_threads, + [ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break]) +done +[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS +]) +if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then + m4_default([$2],:) +else + if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then + OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp + fi + m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) +fi +])dnl AX_OPENMP diff --git a/build.xml b/build.xml index 8b8965680..f65b6e20f 100644 --- a/build.xml +++ b/build.xml @@ -375,6 +375,7 @@ -Wall -Wformat -Werror=format-security -W -Wpointer-arith -Wcast-align -Wsign-compare -Woverloaded-virtual -Wswitch -O2 -mms-bitfields + -fopenmp -DVERSION=\"${version}\" @@ -476,6 +477,7 @@ objcopycommand="${archutil}objcopy"> -mwindows + -mthreads @@ -501,6 +503,7 @@ -lpng -ljpeg.dll -ltiff.dll -lpopt ${devlibs}/lib/zdll.lib -lgc -lws2_32 -lintl -lgdi32 -lcomdlg32 -lm + -lgomp -lpthreadGC2 @@ -530,6 +533,7 @@ objcopycommand="${archutil}objcopy"> -mwindows + -mthreads @@ -553,6 +557,7 @@ -lpng -ljpeg.dll -ltiff.dll -lpopt ${devlibs}/lib/zdll.lib -lgc -lws2_32 -lintl -lgdi32 -lcomdlg32 -lm + -lgomp -lpthreadGC2 @@ -572,6 +577,7 @@ stripcommand="${archutil}strip" objcopycommand="${archutil}objcopy"> + -mthreads @@ -602,6 +608,7 @@ -lpng -ljpeg.dll -ltiff.dll -lpopt ${devlibs}/lib/zdll.lib -lgc -lws2_32 -lintl -lgdi32 -lcomdlg32 -lm + -lgomp -lpthreadGC2 @@ -667,6 +674,7 @@ + diff --git a/configure.ac b/configure.ac index dfcd34686..37c4ddb04 100644 --- a/configure.ac +++ b/configure.ac @@ -140,6 +140,15 @@ dnl Find msgfmt. Without this, po/Makefile fails to set MSGFMT on some platform AC_PATH_PROG(MSGFMT, msgfmt, msgfmt) AC_PATH_PROG(GMSGFMT, gmsgfmt, $MSGFMT) +dnl ****************************** +dnl Check for OpenMP +dnl ****************************** + +AX_OPENMP(,AC_MSG_ERROR([Inkscape requires OpenMP support to build])) +dnl We have it, now set up the flags +CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" +AC_CHECK_HEADER(omp.h) + dnl ****************************** dnl Check for libpng dnl ****************************** @@ -663,7 +672,7 @@ if test "x$cairo_pdf" = "xyes"; then fi dnl Shouldn't we test for libpng and libz? -INKSCAPE_LIBS="$INKSCAPE_LIBS -lpng -lz" +INKSCAPE_LIBS="$INKSCAPE_LIBS -lpng -lz -lgomp" AC_CHECK_HEADER(popt.h, [INKSCAPE_LIBS="$INKSCAPE_LIBS -lpopt"], diff --git a/m4/ax_openmp.m4 b/m4/ax_openmp.m4 new file mode 100644 index 000000000..b50cb1643 --- /dev/null +++ b/m4/ax_openmp.m4 @@ -0,0 +1,101 @@ +# =========================================================================== +# http://autoconf-archive.cryp.to/ax_openmp.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_OPENMP([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]]) +# +# DESCRIPTION +# +# This macro tries to find out how to compile programs that use OpenMP a +# standard API and set of compiler directives for parallel programming +# (see http://www-unix.mcs/) +# +# On success, it sets the OPENMP_CFLAGS/OPENMP_CXXFLAGS/OPENMP_F77FLAGS +# output variable to the flag (e.g. -omp) used both to compile *and* link +# OpenMP programs in the current language. +# +# NOTE: You are assumed to not only compile your program with these flags, +# but also link it with them as well. +# +# If you want to compile everything with OpenMP, you should set: +# +# CFLAGS="$CFLAGS $OPENMP_CFLAGS" +# #OR# CXXFLAGS="$CXXFLAGS $OPENMP_CXXFLAGS" +# #OR# FFLAGS="$FFLAGS $OPENMP_FFLAGS" +# +# (depending on the selected language). +# +# The user can override the default choice by setting the corresponding +# environment variable (e.g. OPENMP_CFLAGS). +# +# ACTION-IF-FOUND is a list of shell commands to run if an OpenMP flag is +# found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it is +# not found. If ACTION-IF-FOUND is not specified, the default action will +# define HAVE_OPENMP. +# +# LAST MODIFICATION +# +# 2008-04-12 +# +# COPYLEFT +# +# Copyright (c) 2008 Steven G. Johnson +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program. If not, see . +# +# As a special exception, the respective Autoconf Macro's copyright owner +# gives unlimited permission to copy, distribute and modify the configure +# scripts that are the output of Autoconf when processing the Macro. You +# need not follow the terms of the GNU General Public License when using +# or distributing such scripts, even though portions of the text of the +# Macro appear in them. The GNU General Public License (GPL) does govern +# all other use of the material that constitutes the Autoconf Macro. +# +# This special exception to the GPL applies to versions of the Autoconf +# Macro released by the Autoconf Macro Archive. When you make and +# distribute a modified version of the Autoconf Macro, you may extend this +# special exception to the GPL to apply to your modified version as well. + +AC_DEFUN([AX_OPENMP], [ +AC_PREREQ(2.59) dnl for _AC_LANG_PREFIX + +AC_CACHE_CHECK([for OpenMP flag of _AC_LANG compiler], ax_cv_[]_AC_LANG_ABBREV[]_openmp, [save[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS +ax_cv_[]_AC_LANG_ABBREV[]_openmp=unknown +# Flags to try: -fopenmp (gcc), -openmp (icc), -mp (SGI & PGI), +# -xopenmp (Sun), -omp (Tru64), -qsmp=omp (AIX), none +ax_openmp_flags="-fopenmp -openmp -mp -xopenmp -omp -qsmp=omp none" +if test "x$OPENMP_[]_AC_LANG_PREFIX[]FLAGS" != x; then + ax_openmp_flags="$OPENMP_[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flags" +fi +for ax_openmp_flag in $ax_openmp_flags; do + case $ax_openmp_flag in + none) []_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[] ;; + *) []_AC_LANG_PREFIX[]FLAGS="$save[]_AC_LANG_PREFIX[]FLAGS $ax_openmp_flag" ;; + esac + AC_TRY_LINK_FUNC(omp_set_num_threads, + [ax_cv_[]_AC_LANG_ABBREV[]_openmp=$ax_openmp_flag; break]) +done +[]_AC_LANG_PREFIX[]FLAGS=$save[]_AC_LANG_PREFIX[]FLAGS +]) +if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" = "xunknown"; then + m4_default([$2],:) +else + if test "x$ax_cv_[]_AC_LANG_ABBREV[]_openmp" != "xnone"; then + OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ax_cv_[]_AC_LANG_ABBREV[]_openmp + fi + m4_default([$1], [AC_DEFINE(HAVE_OPENMP,1,[Define if OpenMP is enabled])]) +fi +])dnl AX_OPENMP diff --git a/src/display/nr-filter-gaussian.cpp b/src/display/nr-filter-gaussian.cpp index 14a039630..4cbd7957d 100644 --- a/src/display/nr-filter-gaussian.cpp +++ b/src/display/nr-filter-gaussian.cpp @@ -16,9 +16,10 @@ #include #include #include -#include #include +#include #include +#include #include "2geom/isnan.h" @@ -268,9 +269,11 @@ static void filter2D_IIR(PT *const dest, int const dstr1, int const dstr2, PT const *const src, int const sstr1, int const sstr2, int const n1, int const n2, IIRValue const b[N+1], double const M[N*N], - IIRValue *const tmpdata) + IIRValue *const tmpdata[], int const num_threads) { +#pragma omp parallel for num_threads(num_threads) for ( int c2 = 0 ; c2 < n2 ; c2++ ) { + unsigned int tid = omp_get_thread_num(); // corresponding line in the source and output buffer PT const * srcimg = src + c2*sstr2; PT * dstimg = dest + c2*dstr2 + n1*dstr1; @@ -288,7 +291,7 @@ filter2D_IIR(PT *const dest, int const dstr1, int const dstr2, for(unsigned int i=1; i0) { for(unsigned int i=N; i>0; i--) copy_n(v[i-1], PC, v[i]); - copy_n(tmpdata+c1*PC, PC, v[0]); + copy_n(tmpdata[tid]+c1*PC, PC, v[0]); for(unsigned int c=0; c static void filter2D_FIR(PT *const dst, int const dstr1, int const dstr2, PT const *const src, int const sstr1, int const sstr2, - int const n1, int const n2, FIRValue const *const kernel, int const scr_len) + int const n1, int const n2, FIRValue const *const kernel, int const scr_len, int const num_threads) { // Past pixels seen (to enable in-place operation) PT history[scr_len+1][PC]; +#pragma omp parallel for num_threads(num_threads) private(history) for ( int c2 = 0 ; c2 < n2 ; c2++ ) { // corresponding line in the source buffer @@ -539,13 +543,14 @@ int FilterGaussian::render(FilterSlot &slot, FilterUnits const &units) } // Some common constants + Inkscape::Preferences *prefs = Inkscape::Preferences::get(); int const width_org = in->area.x1-in->area.x0, height_org = in->area.y1-in->area.y0; double const deviation_x_org = _deviation_x * NR::expansionX(trans); double const deviation_y_org = _deviation_y * NR::expansionY(trans); int const PC = NR_PIXBLOCK_BPP(in); + int const NTHREADS = std::max(1,std::min(8,prefs->getInt("/options/threading/numthreads",omp_get_num_procs()))); // Subsampling constants - Inkscape::Preferences *prefs = Inkscape::Preferences::get(); int const quality = prefs->getInt("/options/blurquality/value"); int const x_step_l2 = _effect_subsample_step_log2(deviation_x_org, quality); int const y_step_l2 = _effect_subsample_step_log2(deviation_y_org, quality); @@ -577,13 +582,19 @@ int FilterGaussian::render(FilterSlot &slot, FilterUnits const &units) } // Temporary storage for IIR filter // NOTE: This can be eliminated, but it reduces the precision a bit - IIRValue * tmpdata = 0; + IIRValue * tmpdata[NTHREADS]; + std::fill_n(tmpdata, NTHREADS, (IIRValue*)0); if ( use_IIR_x || use_IIR_y ) { - tmpdata = new IIRValue[std::max(width,height)*PC]; - if (tmpdata == NULL) { - nr_pixblock_release(out); - delete out; - return 0; + for(int i=0; i0) { + delete[] tmpdata[i]; + } + delete out; + return 0; + } } } NRPixBlock *ssin = in; @@ -629,16 +640,16 @@ int FilterGaussian::render(FilterSlot &slot, FilterUnits const &units) // Filter (x) switch(in->mode) { case NR_PIXBLOCK_MODE_A8: ///< Grayscale - filter2D_IIR(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, b, M, tmpdata, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8: ///< 8 bit RGB - filter2D_IIR(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, b, M, tmpdata, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: ///< Normal 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: ///< Premultiplied 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, b, M, tmpdata, NTHREADS); break; default: assert(false); @@ -651,16 +662,16 @@ int FilterGaussian::render(FilterSlot &slot, FilterUnits const &units) // Filter (x) switch(in->mode) { case NR_PIXBLOCK_MODE_A8: ///< Grayscale - filter2D_FIR(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, kernel, scr_len_x); + filter2D_FIR(NR_PIXBLOCK_PX(out), 1, out->rs, NR_PIXBLOCK_PX(ssin), 1, ssin->rs, width, height, kernel, scr_len_x, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8: ///< 8 bit RGB - filter2D_FIR(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, kernel, scr_len_x); + filter2D_FIR(NR_PIXBLOCK_PX(out), 3, out->rs, NR_PIXBLOCK_PX(ssin), 3, ssin->rs, width, height, kernel, scr_len_x, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: ///< Normal 8 bit RGBA - filter2D_FIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, kernel, scr_len_x); + filter2D_FIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, kernel, scr_len_x, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: ///< Premultiplied 8 bit RGBA - filter2D_FIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, kernel, scr_len_x); + filter2D_FIR(NR_PIXBLOCK_PX(out), 4, out->rs, NR_PIXBLOCK_PX(ssin), 4, ssin->rs, width, height, kernel, scr_len_x, NTHREADS); break; default: assert(false); @@ -688,16 +699,16 @@ int FilterGaussian::render(FilterSlot &slot, FilterUnits const &units) // Filter (y) switch(in->mode) { case NR_PIXBLOCK_MODE_A8: ///< Grayscale - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, b, M, tmpdata, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8: ///< 8 bit RGB - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, b, M, tmpdata, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: ///< Normal 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: ///< Premultiplied 8 bit RGBA - filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata); + filter2D_IIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, b, M, tmpdata, NTHREADS); break; default: assert(false); @@ -710,23 +721,25 @@ int FilterGaussian::render(FilterSlot &slot, FilterUnits const &units) // Filter (y) switch(in->mode) { case NR_PIXBLOCK_MODE_A8: ///< Grayscale - filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, kernel, scr_len_y); + filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 1, NR_PIXBLOCK_PX(out), out->rs, 1, height, width, kernel, scr_len_y, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8: ///< 8 bit RGB - filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, kernel, scr_len_y); + filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 3, NR_PIXBLOCK_PX(out), out->rs, 3, height, width, kernel, scr_len_y, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: ///< Normal 8 bit RGBA - filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, kernel, scr_len_y); + filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, kernel, scr_len_y, NTHREADS); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: ///< Premultiplied 8 bit RGBA - filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, kernel, scr_len_y); + filter2D_FIR(NR_PIXBLOCK_PX(out), out->rs, 4, NR_PIXBLOCK_PX(out), out->rs, 4, height, width, kernel, scr_len_y, NTHREADS); break; default: assert(false); }; } - delete[] tmpdata; // deleting a nullptr has no effect, so this is save + for(int i=0; i