From: buliabyak Date: Sat, 29 Jul 2006 17:07:59 +0000 (+0000) Subject: patch for rendering quality and speed from Jasper van de Gronde X-Git-Url: https://git.tokkee.org/?a=commitdiff_plain;h=9e87caa574d67b9b1aa1015d4faae055336c9d30;p=inkscape.git patch for rendering quality and speed from Jasper van de Gronde --- diff --git a/src/display/canvas-bpath.cpp b/src/display/canvas-bpath.cpp index 17c880017..909ea84ec 100644 --- a/src/display/canvas-bpath.cpp +++ b/src/display/canvas-bpath.cpp @@ -344,9 +344,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven) if ( sv > 249.999 ) { /* Simple copy */ while (len > 0) { - d[0] = NR_COMPOSEN11 (r, 255, d[0]); - d[1] = NR_COMPOSEN11 (g, 255, d[1]); - d[2] = NR_COMPOSEN11 (b, 255, d[2]); + d[0] = NR_COMPOSEN11_1111 (r, 255, d[0]); + d[1] = NR_COMPOSEN11_1111 (g, 255, d[1]); + d[2] = NR_COMPOSEN11_1111 (b, 255, d[2]); d += 3; len -= 1; } @@ -354,9 +354,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven) unsigned int c0_24=(int)sv; c0_24&=0xFF; while (len > 0) { - d[0] = NR_COMPOSEN11 (r, c0_24, d[0]); - d[1] = NR_COMPOSEN11 (g, c0_24, d[1]); - d[2] = NR_COMPOSEN11 (b, c0_24, d[2]); + d[0] = NR_COMPOSEN11_1111 (r, c0_24, d[0]); + d[1] = NR_COMPOSEN11_1111 (g, c0_24, d[1]); + d[2] = NR_COMPOSEN11_1111 (b, c0_24, d[2]); d += 3; len -= 1; } @@ -366,9 +366,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven) sv=0.5*(vst+ven); unsigned int c0_24=(int)sv; c0_24&=0xFF; - d[0] = NR_COMPOSEN11 (r, c0_24, d[0]); - d[1] = NR_COMPOSEN11 (g, c0_24, d[1]); - d[2] = NR_COMPOSEN11 (b, c0_24, d[2]); + d[0] = NR_COMPOSEN11_1111 (r, c0_24, d[0]); + d[1] = NR_COMPOSEN11_1111 (g, c0_24, d[1]); + d[2] = NR_COMPOSEN11_1111 (b, c0_24, d[2]); } else { dv/=len; sv+=0.5*dv; // correction trapezoidale @@ -381,9 +381,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven) /* Draw */ ca = c0_24 >> 16; if ( ca > 255 ) ca=255; - d[0] = NR_COMPOSEN11 (r, ca, d[0]); - d[1] = NR_COMPOSEN11 (g, ca, d[1]); - d[2] = NR_COMPOSEN11 (b, ca, d[2]); + d[0] = NR_COMPOSEN11_1111 (r, ca, d[0]); + d[1] = NR_COMPOSEN11_1111 (g, ca, d[1]); + d[2] = NR_COMPOSEN11_1111 (b, ca, d[2]); d += 3; c0_24 += s0_24; c0_24 = CLAMP (c0_24, 0, 16777216); diff --git a/src/display/canvas-grid.cpp b/src/display/canvas-grid.cpp index 6618c2358..9fb6974f8 100644 --- a/src/display/canvas-grid.cpp +++ b/src/display/canvas-grid.cpp @@ -159,9 +159,9 @@ sp_grid_hline (SPCanvasBuf *buf, gint y, gint xs, gint xe, guint32 rgba) x1 = MIN (buf->rect.x1, xe + 1); p = buf->buf + (y - buf->rect.y0) * buf->buf_rowstride + (x0 - buf->rect.x0) * 3; for (x = x0; x < x1; x++) { - p[0] = NR_COMPOSEN11 (r, a, p[0]); - p[1] = NR_COMPOSEN11 (g, a, p[1]); - p[2] = NR_COMPOSEN11 (b, a, p[2]); + p[0] = NR_COMPOSEN11_1111 (r, a, p[0]); + p[1] = NR_COMPOSEN11_1111 (g, a, p[1]); + p[2] = NR_COMPOSEN11_1111 (b, a, p[2]); p += 3; } } @@ -182,9 +182,9 @@ sp_grid_vline (SPCanvasBuf *buf, gint x, gint ys, gint ye, guint32 rgba) y1 = MIN (buf->rect.y1, ye + 1); p = buf->buf + (y0 - buf->rect.y0) * buf->buf_rowstride + (x - buf->rect.x0) * 3; for (y = y0; y < y1; y++) { - p[0] = NR_COMPOSEN11 (r, a, p[0]); - p[1] = NR_COMPOSEN11 (g, a, p[1]); - p[2] = NR_COMPOSEN11 (b, a, p[2]); + p[0] = NR_COMPOSEN11_1111 (r, a, p[0]); + p[1] = NR_COMPOSEN11_1111 (g, a, p[1]); + p[2] = NR_COMPOSEN11_1111 (b, a, p[2]); p += buf->buf_rowstride; } } diff --git a/src/display/guideline.cpp b/src/display/guideline.cpp index d44ac8ab8..41429b408 100644 --- a/src/display/guideline.cpp +++ b/src/display/guideline.cpp @@ -115,9 +115,9 @@ static void sp_guideline_render(SPCanvasItem *item, SPCanvasBuf *buf) } for (int p = p0; p < p1; p++) { - d[0] = NR_COMPOSEN11(r, a, d[0]); - d[1] = NR_COMPOSEN11(g, a, d[1]); - d[2] = NR_COMPOSEN11(b, a, d[2]); + d[0] = NR_COMPOSEN11_1111(r, a, d[0]); + d[1] = NR_COMPOSEN11_1111(g, a, d[1]); + d[2] = NR_COMPOSEN11_1111(b, a, d[2]); d += step; } } diff --git a/src/display/nr-arena-item.cpp b/src/display/nr-arena-item.cpp index 7e03c51dd..581bee231 100644 --- a/src/display/nr-arena-item.cpp +++ b/src/display/nr-arena-item.cpp @@ -627,8 +627,8 @@ unsigned int nr_arena_item_invoke_render(NRArenaItem *item, NRRectL const *area, d = NR_PIXBLOCK_PX (&mpb) + (y - carea.y0) * mpb.rs; for (x = carea.x0; x < carea.x1; x++) { unsigned int m; - m = ((s[0] + s[1] + s[2]) * s[3] + 127) / (3 * 255); - d[0] = NR_PREMUL (d[0], m); + m = NR_PREMUL_112(s[0]+s[1]+s[2], s[3]); + d[0] = FAST_DIV_ROUND<3*255*255>(NR_PREMUL_123(d[0], m)); s += 4; d += 1; } @@ -641,8 +641,8 @@ unsigned int nr_arena_item_invoke_render(NRArenaItem *item, NRRectL const *area, d = NR_PIXBLOCK_PX (&mpb) + (y - carea.y0) * mpb.rs; for (x = carea.x0; x < carea.x1; x++) { unsigned int m; - m = ((s[0] + s[1] + s[2]) * s[3] + 127) / (3 * 255); - d[0] = m; + m = NR_PREMUL_112(s[0]+s[1]+s[2], s[3]); + d[0] = FAST_DIV_ROUND<3*255>(m); s += 4; d += 1; } @@ -660,7 +660,7 @@ unsigned int nr_arena_item_invoke_render(NRArenaItem *item, NRRectL const *area, unsigned char *d; d = NR_PIXBLOCK_PX (&mpb) + (y - carea.y0) * mpb.rs; for (x = carea.x0; x < carea.x1; x++) { - d[0] = NR_PREMUL (d[0], a); + d[0] = NR_PREMUL_111 (d[0], a); d += 1; } } diff --git a/src/display/nr-arena-shape.cpp b/src/display/nr-arena-shape.cpp index abe343a1d..4653b5ce6 100644 --- a/src/display/nr-arena-shape.cpp +++ b/src/display/nr-arena-shape.cpp @@ -801,7 +801,7 @@ nr_arena_shape_clip(NRArenaItem *item, NRRectL *area, NRPixBlock *pb) s = NR_PIXBLOCK_PX(&m) + (y - area->y0) * m.rs; d = NR_PIXBLOCK_PX(pb) + (y - area->y0) * pb->rs; for (int x = area->x0; x < area->x1; x++) { - *d = NR_A7_NORMALIZED(*s,*d); + *d = NR_COMPOSEA_111(*s, *d); d ++; s ++; } @@ -1148,10 +1148,8 @@ shape_run_A8_OR(raster_info &dest,void */*data*/,int st,float vst,int en,float v unsigned int c0_24=(int)sv; c0_24&=0xFF; while (len > 0) { - unsigned int da; /* Draw */ - da = NR_A7(c0_24,d[0]); - d[0] = NR_PREMUL_SINGLE(da); + d[0] = NR_COMPOSEA_111(c0_24,d[0]); d += 1; len -= 1; } @@ -1162,10 +1160,8 @@ shape_run_A8_OR(raster_info &dest,void */*data*/,int st,float vst,int en,float v sv*=256; unsigned int c0_24=(int)sv; c0_24&=0xFF; - unsigned int da; /* Draw */ - da = NR_A7(c0_24,d[0]); - d[0] = NR_PREMUL_SINGLE(da); + d[0] = NR_COMPOSEA_111(c0_24,d[0]); } else { dv/=len; sv+=0.5*dv; // correction trapezoidale @@ -1174,12 +1170,11 @@ shape_run_A8_OR(raster_info &dest,void */*data*/,int st,float vst,int en,float v int c0_24 = static_cast(CLAMP(sv, 0, 16777216)); int s0_24 = static_cast(dv); while (len > 0) { - unsigned int ca, da; + unsigned int ca; /* Draw */ ca = c0_24 >> 16; if ( ca > 255 ) ca=255; - da = NR_A7(ca,d[0]); - d[0] = NR_PREMUL_SINGLE(da); + d[0] = NR_COMPOSEA_111(ca,d[0]); d += 1; c0_24 += s0_24; c0_24 = CLAMP(c0_24, 0, 16777216); diff --git a/src/display/nr-gradient-gpl.cpp b/src/display/nr-gradient-gpl.cpp index 536217649..e3ee03325 100644 --- a/src/display/nr-gradient-gpl.cpp +++ b/src/display/nr-gradient-gpl.cpp @@ -200,11 +200,11 @@ nr_lgradient_render_R8G8B8A8N (NRLGradientRenderer *lgr, unsigned char *px, int d[2] = s[2]; d[3] = 255; } else if (s[3] != 0) { - ca = NR_A7(s[3],d[3]); - d[0] = NR_COMPOSENNN_A7 (s[0], s[3], d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7 (s[1], s[3], d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7 (s[2], s[3], d[2], d[3], ca); - d[3] = NR_PREMUL_SINGLE(ca); + ca = NR_COMPOSEA_112(s[3],d[3]); + d[0] = NR_COMPOSENNN_111121 (s[0], s[3], d[0], d[3], ca); + d[1] = NR_COMPOSENNN_111121 (s[1], s[3], d[1], d[3], ca); + d[2] = NR_COMPOSENNN_111121 (s[2], s[3], d[2], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); } d += 4; pos += lgr->dx; @@ -242,9 +242,9 @@ nr_lgradient_render_R8G8B8 (NRLGradientRenderer *lgr, unsigned char *px, int x0, } /* Full composition */ s = lgr->vector + 4 * idx; - d[0] = NR_COMPOSEN11 (s[0], s[3], d[0]); - d[1] = NR_COMPOSEN11 (s[1], s[3], d[1]); - d[2] = NR_COMPOSEN11 (s[2], s[3], d[2]); + d[0] = NR_COMPOSEN11_1111 (s[0], s[3], d[0]); + d[1] = NR_COMPOSEN11_1111 (s[1], s[3], d[1]); + d[2] = NR_COMPOSEN11_1111 (s[2], s[3], d[2]); d += 3; pos += lgr->dx; } diff --git a/src/display/nr-plain-stuff.cpp b/src/display/nr-plain-stuff.cpp index af6e002ec..62a61102e 100644 --- a/src/display/nr-plain-stuff.cpp +++ b/src/display/nr-plain-stuff.cpp @@ -79,14 +79,14 @@ nr_render_rgba32_rgb (guchar *px, gint w, gint h, gint rs, gint xoff, gint yoff, b = NR_RGBA32_B (c); a = NR_RGBA32_A (c); - cr = NR_COMPOSEN11 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR0)); - cg = NR_COMPOSEN11 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR0)); - cb = NR_COMPOSEN11 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR0)); + cr = NR_COMPOSEN11_1111 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR0)); + cg = NR_COMPOSEN11_1111 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR0)); + cb = NR_COMPOSEN11_1111 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR0)); c0 = (cr << 24) | (cg << 16) | (cb << 8) | 0xff; - cr = NR_COMPOSEN11 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR1)); - cg = NR_COMPOSEN11 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR1)); - cb = NR_COMPOSEN11 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR1)); + cr = NR_COMPOSEN11_1111 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR1)); + cg = NR_COMPOSEN11_1111 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR1)); + cb = NR_COMPOSEN11_1111 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR1)); c1 = (cr << 24) | (cg << 16) | (cb << 8) | 0xff; nr_render_checkerboard_rgb_custom (px, w, h, rs, xoff, yoff, c0, c1, NR_DEFAULT_CHECKERSIZEP2); diff --git a/src/libnr/nr-blit.cpp b/src/libnr/nr-blit.cpp index 2a93bc9bd..b25f0e2a7 100644 --- a/src/libnr/nr-blit.cpp +++ b/src/libnr/nr-blit.cpp @@ -270,24 +270,24 @@ nr_blit_pixblock_mask_rgba32 (NRPixBlock *d, NRPixBlock *m, unsigned long rgba) unsigned int da; switch (d->mode) { case NR_PIXBLOCK_MODE_R8G8B8: - p[0] = NR_COMPOSEN11 (r, a, p[0]); - p[1] = NR_COMPOSEN11 (g, a, p[1]); - p[2] = NR_COMPOSEN11 (b, a, p[2]); + p[0] = NR_COMPOSEN11_1111 (r, a, p[0]); + p[1] = NR_COMPOSEN11_1111 (g, a, p[1]); + p[2] = NR_COMPOSEN11_1111 (b, a, p[2]); p += 3; break; case NR_PIXBLOCK_MODE_R8G8B8A8P: - p[0] = NR_COMPOSENPP (r, a, p[0], p[3]); - p[1] = NR_COMPOSENPP (g, a, p[1], p[3]); - p[2] = NR_COMPOSENPP (b, a, p[2], p[3]); - p[3] = (65025 - (255 - a) * (255 - p[3]) + 127) / 255; + p[0] = NR_COMPOSENPP_1111 (r, a, p[0]); + p[1] = NR_COMPOSENPP_1111 (g, a, p[1]); + p[2] = NR_COMPOSENPP_1111 (b, a, p[2]); + p[3] = NR_COMPOSEA_111(a, p[3]); p += 4; break; case NR_PIXBLOCK_MODE_R8G8B8A8N: - da = 65025 - (255 - a) * (255 - p[3]); - p[0] = NR_COMPOSENNN_A7 (r, a, p[0], p[3], da); - p[1] = NR_COMPOSENNN_A7 (g, a, p[1], p[3], da); - p[2] = NR_COMPOSENNN_A7 (b, a, p[2], p[3], da); - p[3] = (da + 127) / 255; + da = NR_COMPOSEA_112(a, p[3]); + p[0] = NR_COMPOSENNN_111121 (r, a, p[0], p[3], da); + p[1] = NR_COMPOSENNN_111121 (g, a, p[1], p[3], da); + p[2] = NR_COMPOSENNN_111121 (b, a, p[2], p[3], da); + p[3] = NR_NORMALIZE_21(da); p += 4; break; default: diff --git a/src/libnr/nr-compose-transform.cpp b/src/libnr/nr-compose-transform.cpp index bb5022a74..afc8fd987 100644 --- a/src/libnr/nr-compose-transform.cpp +++ b/src/libnr/nr-compose-transform.cpp @@ -121,11 +121,11 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, const unsigned char *s; unsigned int ca; s = spx + sy * srs + sx * 4; - ca = NR_PREMUL (s[3], alpha); - r += NR_PREMUL (s[0], ca); - g += NR_PREMUL (s[1], ca); - b += NR_PREMUL (s[2], ca); - a += ca; + ca = NR_PREMUL_112 (s[3], alpha); + r += NR_PREMUL_121 (s[0], ca); + g += NR_PREMUL_121 (s[1], ca); + b += NR_PREMUL_121 (s[2], ca); + a += NR_NORMALIZE_21(ca); } } } @@ -143,11 +143,11 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, } else { unsigned int ca; /* Full composition */ - ca = 65025 - (255 - a) * (255 - d[3]); - d[0] = NR_COMPOSENNN_A7 (r, a, d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7 (g, a, d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7 (b, a, d[2], d[3], ca); - d[3] = (ca + 127) / 255; + ca = NR_COMPOSEA_112(a, d[3]); + d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca); + d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca); + d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); } } /* Advance pointers */ @@ -193,19 +193,19 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h const unsigned char *s; unsigned int a; s = spx + sy * srs + sx * 4; - a = NR_PREMUL (s[3], alpha); + a = NR_PREMUL_112 (s[3], alpha); if (a != 0) { - if ((a == 255) || (d[3] == 0)) { + if ((a == 255*255) || (d[3] == 0)) { /* Transparent BG, premul src */ - d[0] = NR_PREMUL (s[0], a); - d[1] = NR_PREMUL (s[1], a); - d[2] = NR_PREMUL (s[2], a); - d[3] = a; + d[0] = NR_PREMUL_121 (s[0], a); + d[1] = NR_PREMUL_121 (s[1], a); + d[2] = NR_PREMUL_121 (s[2], a); + d[3] = NR_NORMALIZE_21(a); } else { - d[0] = NR_COMPOSENPP (s[0], a, d[0], d[3]); - d[1] = NR_COMPOSENPP (s[1], a, d[1], d[3]); - d[2] = NR_COMPOSENPP (s[2], a, d[2], d[3]); - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; + d[0] = NR_COMPOSENPP_1211 (s[0], a, d[0]); + d[1] = NR_COMPOSENPP_1211 (s[1], a, d[1]); + d[2] = NR_COMPOSENPP_1211 (s[2], a, d[2]); + d[3] = NR_COMPOSEA_211(a, d[3]); } } } @@ -259,10 +259,10 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h const unsigned char *s; unsigned int ca; s = spx + sy * srs + sx * 4; - ca = s[3] * alpha; - r += s[0] * ca; - g += s[1] * ca; - b += s[2] * ca; + ca = NR_PREMUL_112(s[3], alpha); + r += NR_PREMUL_123(s[0], ca); + g += NR_PREMUL_123(s[1], ca); + b += NR_PREMUL_123(s[2], ca); a += ca; } } @@ -279,10 +279,10 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h d[2] = b; d[3] = a; } else { - d[0] = NR_COMPOSEPPP (r, a, d[0], d[3]); - d[1] = NR_COMPOSEPPP (g, a, d[1], d[3]); - d[2] = NR_COMPOSEPPP (b, a, d[2], d[3]); - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; + d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]); + d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]); + d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]); + d[3] = NR_COMPOSEA_111(a, d[3]); } } /* Advance pointers */ diff --git a/src/libnr/nr-compose.cpp b/src/libnr/nr-compose.cpp index f0e9c5e4e..3b99678e2 100644 --- a/src/libnr/nr-compose.cpp +++ b/src/libnr/nr-compose.cpp @@ -31,26 +31,26 @@ void nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, c #endif /* __cplusplus */ #endif +// Naming: nr_RESULT_BACKGROUND_FOREGROUND_extra + void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; + unsigned int r, c; - for (r = 0; r < h; r++) { + for (r = h; r > 0; r--) { if (alpha == 0) { - memset (px, 0x0, 4 * w); + memset(px, 0x0, 4 * w); } else if (alpha == 255) { - memcpy (px, spx, 4 * w); + memcpy(px, spx, 4 * w); } else { - const unsigned char *s; - unsigned char *d; - d = px; - s = spx; - for (c = 0; c < w; c++) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { *d++ = *s++; *d++ = *s++; *d++ = *s++; - *d++ = NR_PREMUL (*s, alpha); + *d++ = NR_PREMUL_111(*s, alpha); s++; } } @@ -62,50 +62,68 @@ nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const u void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; + unsigned int r, c; - for (r = 0; r < h; r++) { + for (r = h; r > 0; r--) { if (alpha == 0) { - memset (px, 0x0, 4 * w); + memset(px, 0x0, 4 * w); } else { - const unsigned char *s; - unsigned char *d; - s = spx; - d = px; - for (c = 0; c < w; c++) { - unsigned int a; - a = NR_PREMUL (s[3], alpha); - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = a; + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + if (s[3] == 0) { + d[3] = 0; + } else if (s[3] == 255) { + memcpy(d, s, 4); + } else { + d[0] = NR_DEMUL_111(s[0], s[3]); + d[1] = NR_DEMUL_111(s[1], s[3]); + d[2] = NR_DEMUL_111(s[2], s[3]); + d[3] = NR_PREMUL_111(s[3], alpha); + } d += 4; s += 4; } - px += rs; - spx += srs; } + px += rs; + spx += srs; } } void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; + unsigned int r, c; - for (r = 0; r < h; r++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (c = 0; c < w; c++) { - unsigned int a; - a = (s[3] * alpha + 127) / 255; - d[0] = (s[0] * a + 127) / 255; - d[1] = (s[1] * a + 127) / 255; - d[2] = (s[2] * a + 127) / 255; - d[3] = a; - d += 4; - s += 4; + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + if (alpha == 0) { + memset(px, 0x0, 4 * w); + } else if (alpha == 255) { + for (c = w; c > 0; c--) { + d[0] = NR_PREMUL_111(s[0], s[3]); + d[1] = NR_PREMUL_111(s[1], s[3]); + d[2] = NR_PREMUL_111(s[2], s[3]); + d[3] = s[3]; + d += 4; + s += 4; + } + } else { + for (c = w; c > 0; c--) { + if (s[3] == 0) { + memset(d, 0, 4); + } else { + unsigned int a; + a = NR_PREMUL_112(s[3], alpha); + d[0] = NR_PREMUL_121(s[0], a); + d[1] = NR_PREMUL_121(s[1], a); + d[2] = NR_PREMUL_121(s[2], a); + d[3] = NR_NORMALIZE_21(a); + } + d += 4; + s += 4; + } } px += rs; spx += srs; @@ -115,26 +133,24 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const u void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (c = 0; c < w; c++) { - if (alpha == 255) { - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = s[3]; - } else { - d[0] = NR_PREMUL (s[0], alpha); - d[1] = NR_PREMUL (s[1], alpha); - d[2] = NR_PREMUL (s[2], alpha); - d[3] = NR_PREMUL (s[3], alpha); + unsigned int r, c; + + for (r = h; r > 0; r--) { + if (alpha == 0) { + memset(px, 0x0, 4 * w); + } else if (alpha == 255) { + memcpy(px, spx, 4 * w); + } else { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + d[0] = NR_PREMUL_111(s[0], alpha); + d[1] = NR_PREMUL_111(s[1], alpha); + d[2] = NR_PREMUL_111(s[2], alpha); + d[3] = NR_PREMUL_111(s[3], alpha); + d += 4; + s += 4; } - d += 4; - s += 4; } px += rs; spx += srs; @@ -144,167 +160,280 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const u void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (c = 0; c < w; c++) { - unsigned int a; - a = NR_PREMUL (s[3], alpha); - if (a == 0) { - /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { - /* Full coverage, COPY */ - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = a; - } else { - unsigned int ca; - /* Full composition */ - ca = 65025 - (255 - a) * (255 - d[3]); - d[0] = NR_COMPOSENNN_A7 (s[0], a, d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7 (s[1], a, d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7 (s[2], a, d[2], d[3], ca); - d[3] = (ca + 127) / 255; + unsigned int r, c; + + if (alpha == 0) { + /* NOP */ + } else if (alpha == 255) { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + if (s[3] == 0) { + /* Transparent FG, NOP */ + } else if ((s[3] == 255) || (d[3] == 0)) { + /* Full coverage, COPY */ + memcpy(d, s, 4); + } else { + /* Full composition */ + unsigned int ca; + ca = NR_COMPOSEA_112(s[3], d[3]); + d[0] = NR_COMPOSENNN_111121(s[0], s[3], d[0], d[3], ca); + d[1] = NR_COMPOSENNN_111121(s[1], s[3], d[1], d[3], ca); + d[2] = NR_COMPOSENNN_111121(s[2], s[3], d[2], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); + } + d += 4; + s += 4; } - d += 4; - s += 4; + px += rs; + spx += srs; + } + } else { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + unsigned int a; + a = NR_PREMUL_112(s[3], alpha); + if (a == 0) { + /* Transparent FG, NOP */ + } else if ((a == 255*255) || (d[3] == 0)) { + /* Full coverage, COPY */ + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = NR_NORMALIZE_21(a); + } else { + /* Full composition */ + unsigned int ca; + ca = NR_COMPOSEA_213(a, d[3]); + d[0] = NR_COMPOSENNN_121131(s[0], a, d[0], d[3], ca); + d[1] = NR_COMPOSENNN_121131(s[1], a, d[1], d[3], ca); + d[2] = NR_COMPOSENNN_121131(s[2], a, d[2], d[3], ca); + d[3] = NR_NORMALIZE_31(ca); + } + d += 4; + s += 4; + } + px += rs; + spx += srs; } - px += rs; - spx += srs; } } void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (c = 0; c < w; c++) { - unsigned int a; - a = NR_PREMUL (s[3], alpha); - if (a == 0) { - /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { - /* Full coverage, demul src */ - d[0] = (s[0] * 255 + (s[3] >> 1)) / s[3]; - d[1] = (s[1] * 255 + (s[3] >> 1)) / s[3]; - d[2] = (s[2] * 255 + (s[3] >> 1)) / s[3]; - d[3] = a; - } else { - if (alpha == 255) { - unsigned int ca; + unsigned int r, c; + + if (alpha == 0) { + /* NOP */ + } else if (alpha == 255) { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + if (s[3] == 0) { + /* Transparent FG, NOP */ + } else if (s[3] == 255) { + /* Full coverage, demul src */ + // dc' = ((1 - sa) * da*dc + sc)/da' = sc/da' = sc + // da' = 1 - (1 - sa) * (1 - da) = 1 - 0 * (1 - da) = 1 + memcpy(d, s, 4); + } else if (d[3] == 0) { + /* Full coverage, demul src */ + // dc' = ((1 - sa) * da*dc + sc)/da' = sc/da' = sc/sa = sc/sa + // da' = 1 - (1 - sa) * (1 - da) = 1 - (1 - sa) = sa + d[0] = NR_DEMUL_111(s[0], s[3]); + d[1] = NR_DEMUL_111(s[1], s[3]); + d[2] = NR_DEMUL_111(s[2], s[3]); + d[3] = s[3]; + } else { /* Full composition */ - ca = 65025 - (255 - s[3]) * (255 - d[3]); - d[0] = NR_COMPOSEPNN_A7 (s[0], s[3], d[0], d[3], ca); - d[1] = NR_COMPOSEPNN_A7 (s[1], s[3], d[1], d[3], ca); - d[2] = NR_COMPOSEPNN_A7 (s[2], s[3], d[2], d[3], ca); - d[3] = (65025 - (255 - s[3]) * (255 - d[3]) + 127) / 255; + // dc' = ((1 - sa) * da*dc + sc)/da' = ((1 - sa) * da*dc + sc)/da' + // da' = 1 - (1 - sa) * (1 - da) = 1 - (1 - sa) * (1 - da) + unsigned int da = NR_COMPOSEA_112(s[3], d[3]); + d[0] = NR_COMPOSEPNN_111121(s[0], s[3], d[0], d[3], da); + d[1] = NR_COMPOSEPNN_111121(s[1], s[3], d[1], d[3], da); + d[2] = NR_COMPOSEPNN_111121(s[2], s[3], d[2], d[3], da); + d[3] = NR_NORMALIZE_21(da); + } + d += 4; + s += 4; + } + px += rs; + spx += srs; + } + } else { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + unsigned int a; + a = NR_PREMUL_112(s[3], alpha); + if (a == 0) { + /* Transparent FG, NOP */ + } else if (d[3] == 0) { + /* Full coverage, demul src */ + // dc' = ((1 - alpha*sa) * da*dc + alpha*sc)/da' = alpha*sc/da' = alpha*sc/(alpha*sa) = sc/sa + // da' = 1 - (1 - alpha*sa) * (1 - da) = 1 - (1 - alpha*sa) = alpha*sa + d[0] = NR_DEMUL_111(s[0], s[3]); + d[1] = NR_DEMUL_111(s[1], s[3]); + d[2] = NR_DEMUL_111(s[2], s[3]); + d[3] = NR_NORMALIZE_21(a); } else { - // calculate premultiplied from two premultiplieds: - d[0] = NR_COMPOSEPPP(NR_PREMUL (s[0], alpha), a, NR_PREMUL (d[0], d[3]), 0); // last parameter not used - d[1] = NR_COMPOSEPPP(NR_PREMUL (s[1], alpha), a, NR_PREMUL (d[1], d[3]), 0); - d[2] = NR_COMPOSEPPP(NR_PREMUL (s[2], alpha), a, NR_PREMUL (d[2], d[3]), 0); - // total opacity: - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; - // un-premultiply channels: - d[0] = d[0]*255/d[3]; - d[1] = d[1]*255/d[3]; - d[2] = d[2]*255/d[3]; + // dc' = ((1 - alpha*sa) * da*dc + alpha*sc)/da' + // da' = 1 - (1 - alpha*sa) * (1 - da) + unsigned int da = NR_COMPOSEA_213(a, d[3]); + d[0] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[0], alpha), a, d[0], d[3], da); + d[1] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[1], alpha), a, d[1], d[3], da); + d[2] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[2], alpha), a, d[2], d[3], da); + d[3] = NR_NORMALIZE_31(da); } + d += 4; + s += 4; } - d += 4; - s += 4; + px += rs; + spx += srs; } - px += rs; - spx += srs; } } void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (c = 0; c < w; c++) { - unsigned int a; - a = NR_PREMUL (s[3], alpha); - if (a == 0) { - /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { - /* Transparent BG, premul src */ - d[0] = NR_PREMUL (s[0], a); - d[1] = NR_PREMUL (s[1], a); - d[2] = NR_PREMUL (s[2], a); - d[3] = a; - } else { - d[0] = NR_COMPOSENPP (s[0], a, d[0], d[3]); - d[1] = NR_COMPOSENPP (s[1], a, d[1], d[3]); - d[2] = NR_COMPOSENPP (s[2], a, d[2], d[3]); - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; + unsigned int r, c; + + if (alpha == 0) { + /* NOP */ + } else if (alpha == 255) { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + if (s[3] == 0) { + /* Transparent FG, NOP */ + } else if (s[3] == 255) { + /* Opaque FG, COPY */ + // dc' = (1 - sa) * dc + sa*sc = sa*sc = sc + // da' = 1 - (1 - sa) * (1 - da) = 1 - 0 * (1 - da) = 1 (= sa) + memcpy(d, s, 4); + } else if (d[3] == 0) { + /* Transparent BG, premul src */ + // dc' = (1 - sa) * dc + sa*sc = sa*sc + // da' = 1 - (1 - sa) * (1 - da) = 1 - (1 - sa) = sa + d[0] = NR_PREMUL_111(s[0], s[3]); + d[1] = NR_PREMUL_111(s[1], s[3]); + d[2] = NR_PREMUL_111(s[2], s[3]); + d[3] = s[3]; + } else { + // dc' = (1 - sa) * dc + sa*sc + // da' = 1 - (1 - sa) * (1 - da) + d[0] = NR_COMPOSENPP_1111(s[0], s[3], d[0]); + d[1] = NR_COMPOSENPP_1111(s[1], s[3], d[1]); + d[2] = NR_COMPOSENPP_1111(s[2], s[3], d[2]); + d[3] = NR_COMPOSEA_111(s[3], d[3]); + } + d += 4; + s += 4; } - d += 4; - s += 4; + px += rs; + spx += srs; + } + } else { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + unsigned int a; + a = NR_PREMUL_112 (s[3], alpha); + if (a == 0) { + /* Transparent FG, NOP */ + } else if (d[3] == 0) { + /* Transparent BG, premul src */ + // dc' = (1 - alpha*sa) * dc + alpha*sa*sc = alpha*sa*sc + // da' = 1 - (1 - alpha*sa) * (1 - da) = 1 - (1 - alpha*sa) = alpha*sa + d[0] = NR_PREMUL_121(s[0], a); + d[1] = NR_PREMUL_121(s[1], a); + d[2] = NR_PREMUL_121(s[2], a); + d[3] = NR_NORMALIZE_21(a); + } else { + // dc' = (1 - alpha*sa) * dc + alpha*sa*sc + // da' = 1 - (1 - alpha*sa) * (1 - da) + d[0] = NR_COMPOSENPP_1211(s[0], a, d[0]); + d[1] = NR_COMPOSENPP_1211(s[1], a, d[1]); + d[2] = NR_COMPOSENPP_1211(s[2], a, d[2]); + d[3] = NR_COMPOSEA_211(a, d[3]); + } + d += 4; + s += 4; + } + px += rs; + spx += srs; } - px += rs; - spx += srs; } } void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (c = 0; c < w; c++) { - unsigned int a; - a = NR_PREMUL (s[3], alpha); - if (a == 0) { - /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { - /* Transparent BG, COPY */ - d[0] = NR_PREMUL (s[0], alpha); - d[1] = NR_PREMUL (s[1], alpha); - d[2] = NR_PREMUL (s[2], alpha); - d[3] = NR_PREMUL (s[3], alpha); - } else { - if (alpha == 255) { - /* Simple */ - d[0] = NR_COMPOSEPPP (s[0], s[3], d[0], d[3]); - d[1] = NR_COMPOSEPPP (s[1], s[3], d[1], d[3]); - d[2] = NR_COMPOSEPPP (s[2], s[3], d[2], d[3]); - d[3] = (65025 - (255 - s[3]) * (255 - d[3]) + 127) / 255; + unsigned int r, c; + + if (alpha == 0) { + /* Transparent FG, NOP */ + } else if (alpha == 255) { + /* Simple */ + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + if (s[3] == 0) { + /* Transparent FG, NOP */ + } else if ((s[3] == 255) || (d[3] == 0)) { + /* Transparent BG, COPY */ + memcpy(d, s, 4); } else { - unsigned int c; - c = NR_PREMUL (s[0], alpha); - d[0] = NR_COMPOSEPPP (c, a, d[0], d[3]); - c = NR_PREMUL (s[1], alpha); - d[1] = NR_COMPOSEPPP (c, a, d[1], d[3]); - c = NR_PREMUL (s[2], alpha); - d[2] = NR_COMPOSEPPP (c, a, d[2], d[3]); - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; + d[0] = NR_COMPOSEPPP_1111(s[0], s[3], d[0]); + d[1] = NR_COMPOSEPPP_1111(s[1], s[3], d[1]); + d[2] = NR_COMPOSEPPP_1111(s[2], s[3], d[2]); + d[3] = NR_COMPOSEA_111(s[3], d[3]); } + d += 4; + s += 4; } - d += 4; - s += 4; + px += rs; + spx += srs; + } + } else { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + if (s[3] == 0) { + /* Transparent FG, NOP */ + } else if (d[3] == 0) { + /* Transparent BG, COPY */ + d[0] = NR_PREMUL_111(s[0], alpha); + d[1] = NR_PREMUL_111(s[1], alpha); + d[2] = NR_PREMUL_111(s[2], alpha); + d[3] = NR_PREMUL_111(s[3], alpha); + } else { + // dc' = (1 - alpha*sa) * dc + alpha*sc + // da' = 1 - (1 - alpha*sa) * (1 - da) + unsigned int a; + a = NR_PREMUL_112(s[3], alpha); + d[0] = NR_COMPOSEPPP_2211(NR_PREMUL_112(alpha, s[0]), a, d[0]); + d[1] = NR_COMPOSEPPP_2211(NR_PREMUL_112(alpha, s[1]), a, d[1]); + d[2] = NR_COMPOSEPPP_2211(NR_PREMUL_112(alpha, s[2]), a, d[2]); + d[3] = NR_COMPOSEA_211(a, d[3]); + } + d += 4; + s += 4; + } + px += rs; + spx += srs; } - px += rs; - spx += srs; } } @@ -313,18 +442,17 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, co void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int x, y; - - for (y = 0; y < h; y++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (x = 0; x < w; x++) { + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; - d[3] = (s[3] * m[0] + 127) / 255; + d[3] = NR_PREMUL_111(s[3], m[0]); d += 4; s += 4; m += 1; @@ -338,23 +466,26 @@ nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, cons void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int x, y; - - for (y = 0; y < h; y++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (x = 0; x < w; x++) { + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); + a = NR_PREMUL_112 (s[3], m[0]); if (a == 0) { d[3] = 0; + } else if (a == 255*255) { + memcpy(d, s, 4); } else { - d[0] = (s[0] * 255 + (a >> 1)) / a; - d[1] = (s[1] * 255 + (a >> 1)) / a; - d[2] = (s[2] * 255 + (a >> 1)) / a; - d[3] = a; + // dc' = ((1 - m*sa) * da*dc + m*sc)/da' = m*sc/da' = m*sc/(m*sa) = sc/sa + // da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - m*sa) = m*sa + d[0] = NR_DEMUL_111(s[0], s[3]); + d[1] = NR_DEMUL_111(s[1], s[3]); + d[2] = NR_DEMUL_111(s[2], s[3]); + d[3] = NR_NORMALIZE_21(a); } d += 4; s += 4; @@ -369,20 +500,25 @@ nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, cons void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (c = 0; c < w; c++) { + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); - d[0] = NR_PREMUL (s[0], a); - d[1] = NR_PREMUL (s[1], a); - d[2] = NR_PREMUL (s[2], a); - d[3] = a; + a = NR_PREMUL_112(s[3], m[0]); + if (a == 0) { + memset(d, 0, 4); + } else if (a == 255*255) { + memcpy(d, s, 4); + } else { + d[0] = NR_PREMUL_121(s[0], a); + d[1] = NR_PREMUL_121(s[1], a); + d[2] = NR_PREMUL_121(s[2], a); + d[3] = NR_NORMALIZE_21(a); + } d += 4; s += 4; m += 1; @@ -396,25 +532,17 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, cons void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (c = 0; c < w; c++) { - if (m[0] == 255) { - d[0] = s[0]; - d[1] = s[1]; - d[2] = s[2]; - d[3] = s[3]; - } else { - d[0] = NR_PREMUL (s[0], m[0]); - d[1] = NR_PREMUL (s[1], m[0]); - d[2] = NR_PREMUL (s[2], m[0]); - d[3] = NR_PREMUL (s[3], m[0]); - } + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { + d[0] = NR_PREMUL_111(s[0], m[0]); + d[1] = NR_PREMUL_111(s[1], m[0]); + d[2] = NR_PREMUL_111(s[2], m[0]); + d[3] = NR_PREMUL_111(s[3], m[0]); d += 4; s += 4; m += 1; @@ -428,32 +556,31 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, cons void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (c = 0; c < w; c++) { + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); + a = NR_PREMUL_112(s[3], m[0]); if (a == 0) { /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { + } else if ((a == 255*255) || (d[3] == 0)) { /* Full coverage, COPY */ d[0] = s[0]; d[1] = s[1]; d[2] = s[2]; - d[3] = a; + d[3] = NR_NORMALIZE_21(a); } else { - unsigned int ca; /* Full composition */ - ca = 65025 - (255 - a) * (255 - d[3]); - d[0] = NR_COMPOSENNN_A7 (s[0], a, d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7 (s[1], a, d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7 (s[2], a, d[2], d[3], ca); - d[3] = (ca + 127) / 255; + unsigned int ca; + ca = NR_COMPOSEA_213(a, d[3]); + d[0] = NR_COMPOSENNN_121131(s[0], a, d[0], d[3], ca); + d[1] = NR_COMPOSENNN_121131(s[1], a, d[1], d[3], ca); + d[2] = NR_COMPOSENNN_121131(s[2], a, d[2], d[3], ca); + d[3] = NR_NORMALIZE_31(ca); } d += 4; s += 4; @@ -468,45 +595,45 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (c = 0; c < w; c++) { + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); + a = NR_PREMUL_112(s[3], m[0]); if (a == 0) { /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { + } else if (a == 255*255) { + /* Opaque FG, COPY */ + memcpy(d, s, 4); + } else if (d[3] == 0) { /* Full coverage, demul src */ - d[0] = (s[0] * 255 + (s[3] >> 1)) / s[3]; - d[1] = (s[1] * 255 + (s[3] >> 1)) / s[3]; - d[2] = (s[2] * 255 + (s[3] >> 1)) / s[3]; - d[3] = a; + // dc' = ((1 - m*sa) * da*dc + m*sc)/da' = m*sc/da' = m*sc/(m*sa) = sc/sa + // da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - m*sa) = m*sa + d[0] = NR_DEMUL_111(s[0], s[3]); + d[1] = NR_DEMUL_111(s[1], s[3]); + d[2] = NR_DEMUL_111(s[2], s[3]); + d[3] = NR_NORMALIZE_21(a); + } else if (m[0] == 255) { + /* Full composition */ + // dc' = ((1 - m*sa) * da*dc + m*sc)/da' = ((1 - sa) * da*dc + sc)/da' + // da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - sa) * (1 - da) + unsigned int da = NR_COMPOSEA_112(s[3], d[3]); + d[0] = NR_COMPOSEPNN_111121(s[0], s[3], d[0], d[3], da); + d[1] = NR_COMPOSEPNN_111121(s[1], s[3], d[1], d[3], da); + d[2] = NR_COMPOSEPNN_111121(s[2], s[3], d[2], d[3], da); + d[3] = NR_NORMALIZE_21(da); } else { - if (m[0] == 255) { - unsigned int ca; - /* Full composition */ - ca = 65025 - (255 - s[3]) * (255 - d[3]); - d[0] = NR_COMPOSEPNN_A7 (s[0], s[3], d[0], d[3], ca); - d[1] = NR_COMPOSEPNN_A7 (s[1], s[3], d[1], d[3], ca); - d[2] = NR_COMPOSEPNN_A7 (s[2], s[3], d[2], d[3], ca); - d[3] = (65025 - (255 - s[3]) * (255 - d[3]) + 127) / 255; - } else { - // calculate premultiplied from two premultiplieds: - d[0] = NR_COMPOSEPPP(NR_PREMUL (s[0], m[0]), a, NR_PREMUL (d[0], d[3]), 0); // last parameter not used - d[1] = NR_COMPOSEPPP(NR_PREMUL (s[1], m[0]), a, NR_PREMUL (d[1], d[3]), 0); - d[2] = NR_COMPOSEPPP(NR_PREMUL (s[2], m[0]), a, NR_PREMUL (d[2], d[3]), 0); - // total opacity: - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; - // un-premultiply channels: - d[0] = d[0]*255/d[3]; - d[1] = d[1]*255/d[3]; - d[2] = d[2]*255/d[3]; - } + // dc' = ((1 - m*sa) * da*dc + m*sc)/da' + // da' = 1 - (1 - m*sa) * (1 - da) + unsigned int da = NR_COMPOSEA_213(a, d[3]); + d[0] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[0], m[0]), a, d[0], d[3], da); + d[1] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[1], m[0]), a, d[1], d[3], da); + d[2] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[2], m[0]), a, d[2], d[3], da); + d[3] = NR_NORMALIZE_31(da); } d += 4; s += 4; @@ -521,29 +648,24 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (c = 0; c < w; c++) { + unsigned int r, c; + + for (r = h; r>0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c>0; c--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); + a = NR_PREMUL_112(s[3], m[0]); if (a == 0) { /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { - /* Transparent BG, premul src */ - d[0] = NR_PREMUL (s[0], a); - d[1] = NR_PREMUL (s[1], a); - d[2] = NR_PREMUL (s[2], a); - d[3] = a; + } else if (a == 255*255) { + memcpy(d, s, 4); } else { - d[0] = NR_COMPOSENPP (s[0], a, d[0], d[3]); - d[1] = NR_COMPOSENPP (s[1], a, d[1], d[3]); - d[2] = NR_COMPOSENPP (s[2], a, d[2], d[3]); - d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255; + d[0] = NR_COMPOSENPP_1211(s[0], a, d[0]); + d[1] = NR_COMPOSENPP_1211(s[1], a, d[1]); + d[2] = NR_COMPOSENPP_1211(s[2], a, d[2]); + d[3] = NR_COMPOSEA_211(a, d[3]); } d += 4; s += 4; @@ -558,41 +680,35 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int r, c; - - for (r = 0; r < h; r++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (c = 0; c < w; c++) { + unsigned int r, c; + + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + const unsigned char *m = mpx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); + a = NR_PREMUL_112 (s[3], m[0]); if (a == 0) { /* Transparent FG, NOP */ - } else if ((a == 255) || (d[3] == 0)) { + } else if (a == 255*255) { + /* Opaque FG, COPY */ + memcpy(d, s, 4); + } else if (d[3] == 0) { /* Transparent BG, COPY */ - d[0] = NR_PREMUL (s[0], m[0]); - d[1] = NR_PREMUL (s[1], m[0]); - d[2] = NR_PREMUL (s[2], m[0]); - d[3] = NR_PREMUL (s[3], m[0]); + // dc' = (1 - m*sa) * dc + m*sc = m*sc + // da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - m*sa) = m*sa + d[0] = NR_PREMUL_111 (s[0], m[0]); + d[1] = NR_PREMUL_111 (s[1], m[0]); + d[2] = NR_PREMUL_111 (s[2], m[0]); + d[3] = NR_NORMALIZE_21(a); } else { - if (m[0] == 255) { - /* Simple */ - d[0] = NR_COMPOSEPPP (s[0], s[3], d[0], d[3]); - d[1] = NR_COMPOSEPPP (s[1], s[3], d[1], d[3]); - d[2] = NR_COMPOSEPPP (s[2], s[3], d[2], d[3]); - d[3] = NR_A7_NORMALIZED(s[3], d[3]); - } else { - unsigned int c; - c = NR_PREMUL (s[0], m[0]); - d[0] = NR_COMPOSEPPP (c, a, d[0], d[3]); - c = NR_PREMUL (s[1], m[0]); - d[1] = NR_COMPOSEPPP (c, a, d[1], d[3]); - c = NR_PREMUL (s[2], m[0]); - d[2] = NR_COMPOSEPPP (c, a, d[2], d[3]); - d[3] = NR_A7_NORMALIZED(a, d[3]); - } + // dc' = (1 - m*sa) * dc + m*sc + // da' = 1 - (1 - m*sa) * (1 - da) + d[0] = NR_COMPOSEPPP_2211 (NR_PREMUL_112 (s[0], m[0]), a, d[0]); + d[1] = NR_COMPOSEPPP_2211 (NR_PREMUL_112 (s[1], m[0]), a, d[1]); + d[2] = NR_COMPOSEPPP_2211 (NR_PREMUL_112 (s[2], m[0]), a, d[2]); + d[3] = NR_COMPOSEA_211(a, d[3]); } d += 4; s += 4; @@ -604,78 +720,97 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, } } +/* FINAL DST MASK COLOR */ + void -nr_R8G8B8A8_N_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba) +nr_R8G8B8A8_N_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba) { unsigned int r, g, b, a; - int x, y; + unsigned int x, y; r = NR_RGBA32_R (rgba); g = NR_RGBA32_G (rgba); b = NR_RGBA32_B (rgba); a = NR_RGBA32_A (rgba); - if (a == 0) return; - - for (y = 0; y < h; y++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (x = 0; x < w; x++) { - d[0] = r; - d[1] = g; - d[2] = b; - d[3] = NR_PREMUL (s[0], a); - d += 4; - s += 1; + for (y = h; y > 0; y--) { + if (a == 0) { + memset(px, 0, w*4); + } else { + unsigned char *d = px; + const unsigned char *m = mpx; + for (x = w; x > 0; x--) { + d[0] = r; + d[1] = g; + d[2] = b; + d[3] = NR_PREMUL_111 (m[0], a); + d += 4; + m += 1; + } } px += rs; - spx += srs; + mpx += mrs; } } void -nr_R8G8B8A8_P_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba) +nr_R8G8B8A8_P_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba) { unsigned int r, g, b, a; - int x, y; + unsigned int x, y; r = NR_RGBA32_R (rgba); g = NR_RGBA32_G (rgba); b = NR_RGBA32_B (rgba); a = NR_RGBA32_A (rgba); - if (a == 0) return; - #ifdef WITH_MMX if (NR_PIXOPS_MMX) { unsigned char c[4]; - c[0] = NR_PREMUL (r, a); - c[1] = NR_PREMUL (g, a); - c[2] = NR_PREMUL (b, a); + c[0] = NR_PREMUL_111 (r, a); + c[1] = NR_PREMUL_111 (g, a); + c[2] = NR_PREMUL_111 (b, a); c[3] = a; /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */ - nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP (px, w, h, rs, spx, srs, c); + nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP (px, w, h, rs, mpx, mrs, c); return; } #endif - for (y = 0; y < h; y++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (x = 0; x < w; x++) { - unsigned int ca; - ca = s[0] * a; - d[0] = (r * ca + 32512) / 65025; - d[1] = (g * ca + 32512) / 65025; - d[2] = (b * ca + 32512) / 65025; - d[3] = (ca + 127) / 255; - d += 4; - s += 1; + if ( a != 255 ){ + // Pre-premultiply color values + r *= a; + g *= a; + b *= a; + } + + for (y = h; y > 0; y--) { + unsigned char *d = px; + const unsigned char *m = mpx; + if (a == 0) { + memset(px, 0, w*4); + } else if (a == 255) { + for (x = w; x > 0; x--) { + d[0] = NR_PREMUL_111(m[0], r); + d[1] = NR_PREMUL_111(m[0], g); + d[2] = NR_PREMUL_111(m[0], b); + d[3] = m[0]; + d += 4; + m += 1; + } + } else { + for (x = w; x > 0; x--) { + // Color values are already premultiplied with a + d[0] = NR_PREMUL_121(m[0], r); + d[1] = NR_PREMUL_121(m[0], g); + d[2] = NR_PREMUL_121(m[0], b); + d[3] = NR_PREMUL_111(m[0], a); + d += 4; + m += 1; + } } px += rs; - spx += srs; + mpx += mrs; } } @@ -683,75 +818,118 @@ void nr_R8G8B8_R8G8B8_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba) { unsigned int r, g, b, a; - int x, y; + unsigned int x, y; r = NR_RGBA32_R (rgba); g = NR_RGBA32_G (rgba); b = NR_RGBA32_B (rgba); a = NR_RGBA32_A (rgba); - if (a == 0) return; - - for (y = 0; y < h; y++) { - unsigned char *d, *m; - d = (unsigned char *) px; - m = (unsigned char *) mpx; - for (x = 0; x < w; x++) { - unsigned int alpha; - alpha = NR_PREMUL (a, m[0]); - d[0] = NR_COMPOSEN11 (r, alpha, d[0]); - d[1] = NR_COMPOSEN11 (g, alpha, d[1]); - d[2] = NR_COMPOSEN11 (b, alpha, d[2]); - d += 3; - m += 1; + if (a == 0) { + /* NOP */ + } else if (a == 255) { + for (y = h; y > 0; y--) { + unsigned char *d = px; + const unsigned char *m = mpx; + for (x = w; x > 0; x--) { + d[0] = NR_COMPOSEN11_1111 (r, m[0], d[0]); + d[1] = NR_COMPOSEN11_1111 (g, m[0], d[1]); + d[2] = NR_COMPOSEN11_1111 (b, m[0], d[2]); + d += 3; + m += 1; + } + px += rs; + mpx += mrs; + } + } else { + for (y = h; y > 0; y--) { + unsigned char *d = px; + const unsigned char *m = mpx; + for (x = w; x > 0; x--) { + // dc' = (1 - m*sa) * dc + m*sa*sc + unsigned int alpha; + alpha = NR_PREMUL_112 (a, m[0]); + d[0] = NR_COMPOSEN11_1211 (r, alpha, d[0]); + d[1] = NR_COMPOSEN11_1211 (g, alpha, d[1]); + d[2] = NR_COMPOSEN11_1211 (b, alpha, d[2]); + d += 3; + m += 1; + } + px += rs; + mpx += mrs; } - px += rs; - mpx += mrs; } } void -nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba) +nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba) { unsigned int r, g, b, a; - int x, y; + unsigned int x, y; r = NR_RGBA32_R (rgba); g = NR_RGBA32_G (rgba); b = NR_RGBA32_B (rgba); a = NR_RGBA32_A (rgba); - if (a == 0) return; - - for (y = 0; y < h; y++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (x = 0; x < w; x++) { - unsigned int ca; - ca = NR_PREMUL (s[0], a); - if (ca == 0) { - /* Transparent FG, NOP */ - } else if ((ca == 255) || (d[3] == 0)) { - /* Full coverage, COPY */ - d[0] = r; - d[1] = g; - d[2] = b; - d[3] = ca; - } else { - unsigned int da; - /* Full composition */ - da = 65025 - (255 - ca) * (255 - d[3]); - d[0] = NR_COMPOSENNN_A7 (r, ca, d[0], d[3], da); - d[1] = NR_COMPOSENNN_A7 (g, ca, d[1], d[3], da); - d[2] = NR_COMPOSENNN_A7 (b, ca, d[2], d[3], da); - d[3] = (da + 127) / 255; + if (a == 0) { + /* NOP */ + } else if (a == 255) { + for (y = h; y > 0; y--) { + unsigned char *d = px; + const unsigned char *m = mpx; + for (x = w; x > 0; x--) { + if (m[0] == 0) { + /* Transparent FG, NOP */ + } else if (m[0] == 255 || d[3] == 0) { + /* Full coverage, COPY */ + d[0] = r; + d[1] = g; + d[2] = b; + d[3] = m[0]; + } else { + /* Full composition */ + unsigned int da = NR_COMPOSEA_112(m[0], d[3]); + d[0] = NR_COMPOSENNN_111121(r, m[0], d[0], d[3], da); + d[1] = NR_COMPOSENNN_111121(g, m[0], d[1], d[3], da); + d[2] = NR_COMPOSENNN_111121(b, m[0], d[2], d[3], da); + d[3] = NR_NORMALIZE_21(da); + } + d += 4; + m += 1; } - d += 4; - s += 1; + px += rs; + mpx += mrs; + } + } else { + for (y = h; y > 0; y--) { + unsigned char *d = px; + const unsigned char *m = mpx; + for (x = w; x > 0; x--) { + unsigned int ca; + ca = NR_PREMUL_112 (m[0], a); + if (ca == 0) { + /* Transparent FG, NOP */ + } else if (d[3] == 0) { + /* Full coverage, COPY */ + d[0] = r; + d[1] = g; + d[2] = b; + d[3] = NR_NORMALIZE_21(ca); + } else { + /* Full composition */ + unsigned int da = NR_COMPOSEA_213(ca, d[3]); + d[0] = NR_COMPOSENNN_121131(r, ca, d[0], d[3], da); + d[1] = NR_COMPOSENNN_121131(g, ca, d[1], d[3], da); + d[2] = NR_COMPOSENNN_121131(b, ca, d[2], d[3], da); + d[3] = NR_NORMALIZE_31(da); + } + d += 4; + m += 1; + } + px += rs; + mpx += mrs; } - px += rs; - spx += srs; } } @@ -759,9 +937,7 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba) { unsigned int r, g, b, a; - int x, y; - - if (!(rgba & 0xff)) return; + unsigned int x, y; r = NR_RGBA32_R (rgba); g = NR_RGBA32_G (rgba); @@ -769,11 +945,11 @@ nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, con a = NR_RGBA32_A (rgba); #ifdef WITH_MMX - if (NR_PIXOPS_MMX) { + if (NR_PIXOPS_MMX && a != 0) { unsigned char c[4]; - c[0] = NR_PREMUL (r, a); - c[1] = NR_PREMUL (g, a); - c[2] = NR_PREMUL (b, a); + c[0] = NR_PREMUL_111 (r, a); + c[1] = NR_PREMUL_111 (g, a); + c[2] = NR_PREMUL_111 (b, a); c[3] = a; /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */ nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP (px, w, h, rs, spx, srs, c); @@ -781,33 +957,55 @@ nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, con } #endif - for (y = 0; y < h; y++) { - unsigned char *d, *s; - d = (unsigned char *) px; - s = (unsigned char *) spx; - for (x = 0; x < w; x++) { - unsigned int ca; - ca = NR_PREMUL (s[0], a); - if (ca == 0) { - /* Transparent FG, NOP */ - } else if ((ca == 255) || (d[3] == 0)) { - /* Full coverage, COPY */ - d[0] = NR_PREMUL (r, ca); - d[1] = NR_PREMUL (g, ca); - d[2] = NR_PREMUL (b, ca); - d[3] = ca; - } else { - /* Full composition */ - d[0] = NR_COMPOSENPP (r, ca, d[0], d[3]); - d[1] = NR_COMPOSENPP (g, ca, d[1], d[3]); - d[2] = NR_COMPOSENPP (b, ca, d[2], d[3]); - d[3] = (65025 - (255 - ca) * (255 - d[3]) + 127) / 255; + if (a == 0) { + /* Transparent FG, NOP */ + } else if (a == 255) { + /* Simple */ + for (y = h; y > 0; y--) { + unsigned char *d, *s; + d = (unsigned char *) px; + s = (unsigned char *) spx; + for (x = w; x > 0; x--) { + if (s[0] == 0) { + /* Transparent FG, NOP */ + } else { + /* Full composition */ + unsigned int invca = 255-s[0]; // By swapping the arguments GCC can better optimize these calls + d[0] = NR_COMPOSENPP_1111(d[0], invca, r); + d[1] = NR_COMPOSENPP_1111(d[1], invca, g); + d[2] = NR_COMPOSENPP_1111(d[2], invca, b); + d[3] = NR_COMPOSEA_111(s[0], d[3]); + } + d += 4; + s += 1; } - d += 4; - s += 1; + px += rs; + spx += srs; + } + } else { + for (y = h; y > 0; y--) { + unsigned char *d, *s; + d = (unsigned char *) px; + s = (unsigned char *) spx; + for (x = w; x > 0; x--) { + unsigned int ca; + ca = NR_PREMUL_112 (s[0], a); + if (ca == 0) { + /* Transparent FG, NOP */ + } else { + /* Full composition */ + unsigned int invca = 255*255-ca; // By swapping the arguments GCC can better optimize these calls + d[0] = NR_COMPOSENPP_1211(d[0], invca, r); + d[1] = NR_COMPOSENPP_1211(d[1], invca, g); + d[2] = NR_COMPOSENPP_1211(d[2], invca, b); + d[3] = NR_COMPOSEA_211(ca, d[3]); + } + d += 4; + s += 1; + } + px += rs; + spx += srs; } - px += rs; - spx += srs; } } @@ -816,25 +1014,24 @@ nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, con void nr_R8G8B8_R8G8B8_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - if (alpha == 0) return; + unsigned int r, c; #ifdef WITH_MMX - if (NR_PIXOPS_MMX) { + if (NR_PIXOPS_MMX && alpha != 0) { /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */ nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P (px, w, h, rs, spx, srs, alpha); return; } #endif - for (r = 0; r < h; r++) { - const unsigned char *s; - unsigned char *d; - if (alpha == 255) { - d = px; - s = spx; - for (c = 0; c < w; c++) { + if (alpha == 0) { + /* NOP */ + } else if (alpha == 255) { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + // dc' = (1 - alpha*sa) * dc + alpha*sc = (1 - sa) * dc + sc if (s[3] == 0) { /* NOP */ } else if (s[3] == 255) { @@ -842,50 +1039,54 @@ nr_R8G8B8_R8G8B8_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsi d[1] = s[1]; d[2] = s[2]; } else { - d[0] = NR_COMPOSEP11 (s[0], s[3], d[0]); - d[1] = NR_COMPOSEP11 (s[1], s[3], d[1]); - d[2] = NR_COMPOSEP11 (s[2], s[3], d[2]); + d[0] = NR_COMPOSEP11_1111(s[0], s[3], d[0]); + d[1] = NR_COMPOSEP11_1111(s[1], s[3], d[1]); + d[2] = NR_COMPOSEP11_1111(s[2], s[3], d[2]); } d += 3; s += 4; } - } else { - d = px; - s = spx; - for (c = 0; c < w; c++) { + px += rs; + spx += srs; + } + } else { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], alpha); + a = NR_PREMUL_112(s[3], alpha); + // dc' = (1 - alpha*sa) * dc + alpha*sc if (a == 0) { /* NOP */ } else { - d[0] = NR_COMPOSEP11 (s[0], a, d[0]); - d[1] = NR_COMPOSEP11 (s[1], a, d[1]); - d[2] = NR_COMPOSEP11 (s[2], a, d[2]); + d[0] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[0], alpha), a, d[0]); + d[1] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[1], alpha), a, d[1]); + d[2] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[2], alpha), a, d[2]); } /* a == 255 is impossible, because alpha < 255 */ d += 3; s += 4; } + px += rs; + spx += srs; } - px += rs; - spx += srs; } } void nr_R8G8B8_R8G8B8_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha) { - int r, c; - - for (r = 0; r < h; r++) { - const unsigned char *s; - unsigned char *d; - if (alpha == 0) { - /* NOP */ - } else if (alpha == 255) { - d = px; - s = spx; - for (c = 0; c < w; c++) { + unsigned int r, c; + + if (alpha == 0) { + /* NOP */ + } else if (alpha == 255) { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { + // dc' = (1 - alpha*sa) * dc + alpha*sa*sc = (1 - sa) * dc + sa*sc if (s[3] == 0) { /* NOP */ } else if (s[3] == 255) { @@ -893,57 +1094,62 @@ nr_R8G8B8_R8G8B8_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsi d[1] = s[1]; d[2] = s[2]; } else { - d[0] = NR_COMPOSEN11 (s[0], s[3], d[0]); - d[1] = NR_COMPOSEN11 (s[1], s[3], d[1]); - d[2] = NR_COMPOSEN11 (s[2], s[3], d[2]); + d[0] = NR_COMPOSEN11_1111(s[0], s[3], d[0]); + d[1] = NR_COMPOSEN11_1111(s[1], s[3], d[1]); + d[2] = NR_COMPOSEN11_1111(s[2], s[3], d[2]); } d += 3; s += 4; } - } else { - d = px; - s = spx; - for (c = 0; c < w; c++) { + px += rs; + spx += srs; + } + } else { + for (r = h; r > 0; r--) { + unsigned char *d = px; + const unsigned char *s = spx; + for (c = w; c > 0; c--) { unsigned int a; - a = NR_PREMUL (s[3], alpha); + a = NR_PREMUL_112(s[3], alpha); + // dc' = (1 - alpha*sa) * dc + alpha*sa*sc if (a == 0) { /* NOP */ } else { - d[0] = NR_COMPOSEN11 (s[0], a, d[0]); - d[1] = NR_COMPOSEN11 (s[1], a, d[1]); - d[2] = NR_COMPOSEN11 (s[2], a, d[2]); + d[0] = NR_COMPOSEN11_1211(s[0], a, d[0]); + d[1] = NR_COMPOSEN11_1211(s[1], a, d[1]); + d[2] = NR_COMPOSEN11_1211(s[2], a, d[2]); } /* a == 255 is impossible, because alpha < 255 */ d += 3; s += 4; } + px += rs; + spx += srs; } - px += rs; - spx += srs; } } void nr_R8G8B8_R8G8B8_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int x, y; - - for (y = 0; y < h; y++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (x = 0; x < w; x++) { + unsigned int x, y; + + for (y = h; y > 0; y--) { + unsigned char* d = px; + const unsigned char* s = spx; + const unsigned char* m = mpx; + for (x = w; x > 0; x--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); - if (a != 0) { - unsigned int r, g, b; - r = NR_PREMUL (s[0], m[0]); - d[0] = NR_COMPOSEP11 (r, a, d[0]); - g = NR_PREMUL (s[1], m[0]); - d[1] = NR_COMPOSEP11 (g, a, d[1]); - b = NR_PREMUL (s[2], m[0]); - d[2] = NR_COMPOSEP11 (b, a, d[2]); + a = NR_PREMUL_112(s[3], m[0]); + if (a == 0) { + /* NOP */ + } else if (a == 255*255) { + memcpy(d, s, 3); + } else { + // dc' = (1 - m*sa) * dc + m*sc + d[0] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[0], m[0]), a, d[0]); + d[1] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[1], m[0]), a, d[1]); + d[2] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[2], m[0]), a, d[2]); } d += 3; s += 4; @@ -958,20 +1164,24 @@ nr_R8G8B8_R8G8B8_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const u void nr_R8G8B8_R8G8B8_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs) { - int x, y; - - for (y = 0; y < h; y++) { - unsigned char *d, *s, *m; - d = (unsigned char *) px; - s = (unsigned char *) spx; - m = (unsigned char *) mpx; - for (x = 0; x < w; x++) { + unsigned int x, y; + + for (y = h; y > 0; y--) { + unsigned char* d = px; + const unsigned char* s = spx; + const unsigned char* m = mpx; + for (x = w; x > 0; x--) { unsigned int a; - a = NR_PREMUL (s[3], m[0]); - if (a != 0) { - d[0] = NR_COMPOSEP11 (s[0], a, d[0]); - d[1] = NR_COMPOSEP11 (s[1], a, d[1]); - d[2] = NR_COMPOSEP11 (s[2], a, d[2]); + a = NR_PREMUL_112(s[3], m[0]); + if (a == 0) { + /* NOP */ + } else if (a == 255*255) { + memcpy(d, s, 3); + } else { + // dc' = (1 - m*sa) * dc + m*sa*sc + d[0] = NR_COMPOSEN11_1211(s[0], a, d[0]); + d[1] = NR_COMPOSEN11_1211(s[1], a, d[1]); + d[2] = NR_COMPOSEN11_1211(s[2], a, d[2]); } d += 3; s += 4; diff --git a/src/libnr/nr-compose.h b/src/libnr/nr-compose.h index ccdb52cb0..4cecfac60 100644 --- a/src/libnr/nr-compose.h +++ b/src/libnr/nr-compose.h @@ -55,9 +55,9 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_A8 (unsigned char *p, int w, int h, int void nr_R8G8B8A8_N_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba); void nr_R8G8B8A8_P_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba); -void nr_R8G8B8_R8G8B8_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba); -void nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba); -void nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba); +void nr_R8G8B8_R8G8B8_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba); +void nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba); +void nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba); /* RGB */ diff --git a/src/libnr/nr-gradient.cpp b/src/libnr/nr-gradient.cpp index 4553eb598..7383d6c76 100644 --- a/src/libnr/nr-gradient.cpp +++ b/src/libnr/nr-gradient.cpp @@ -161,10 +161,10 @@ nr_rgradient_render_generic_symmetric(NRRGradientRenderer *rgr, NRPixBlock *pb) idx = (int) CLAMP(pos, 0, (double) NRG_MASK); } unsigned char const *s = rgr->vector + 4 * idx; - d[0] = NR_COMPOSENPP(s[0], s[3], d[0], d[3]); - d[1] = NR_COMPOSENPP(s[1], s[3], d[1], d[3]); - d[2] = NR_COMPOSENPP(s[2], s[3], d[2], d[3]); - d[3] = (255*255 - (255 - s[3]) * (255 - d[3]) + 127) / 255; + d[0] = NR_COMPOSENPP_1111(s[0], s[3], d[0]); + d[1] = NR_COMPOSENPP_1111(s[1], s[3], d[1]); + d[2] = NR_COMPOSENPP_1111(s[2], s[3], d[2]); + d[3] = NR_COMPOSEA_111(s[3], d[3]); d += 4; gx += dx; gy += dy; @@ -193,11 +193,11 @@ nr_rgradient_render_generic_symmetric(NRRGradientRenderer *rgr, NRPixBlock *pb) d[2] = s[2]; d[3] = 255; } else if (s[3] != 0) { - unsigned ca = 255*255 - (255 - s[3]) * (255 - d[3]); - d[0] = NR_COMPOSENNN_A7(s[0], s[3], d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7(s[1], s[3], d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7(s[2], s[3], d[2], d[3], ca); - d[3] = (ca + 127) / 255; + unsigned ca = NR_COMPOSEA_112(s[3], d[3]); + d[0] = NR_COMPOSENNN_111121(s[0], s[3], d[0], d[3], ca); + d[1] = NR_COMPOSENNN_111121(s[1], s[3], d[1], d[3], ca); + d[2] = NR_COMPOSENNN_111121(s[2], s[3], d[2], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); } d += 4; gx += dx; diff --git a/src/libnr/nr-pixblock-pattern.cpp b/src/libnr/nr-pixblock-pattern.cpp index b4e25638f..771786584 100644 --- a/src/libnr/nr-pixblock-pattern.cpp +++ b/src/libnr/nr-pixblock-pattern.cpp @@ -52,28 +52,28 @@ nr_pixblock_render_gray_noise (NRPixBlock *pb, NRPixBlock *mask) v = v ^ noise[seed]; switch (pb->mode) { case NR_PIXBLOCK_MODE_A8: - d[0] = (65025 - (255 - m[0]) * (255 - d[0]) + 127) / 255; + d[0] = NR_COMPOSEA_111(m[0], d[0]); break; case NR_PIXBLOCK_MODE_R8G8B8: - d[0] = NR_COMPOSEN11 (v, m[0], d[0]); - d[1] = NR_COMPOSEN11 (v, m[0], d[1]); - d[2] = NR_COMPOSEN11 (v, m[0], d[2]); + d[0] = NR_COMPOSEN11_1111 (v, m[0], d[0]); + d[1] = NR_COMPOSEN11_1111 (v, m[0], d[1]); + d[2] = NR_COMPOSEN11_1111 (v, m[0], d[2]); break; case NR_PIXBLOCK_MODE_R8G8B8A8N: if (m[0] != 0) { unsigned int ca; - ca = NR_A7 (m[0], d[3]); - d[0] = NR_COMPOSENNN_A7 (v, m[0], d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7 (v, m[0], d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7 (v, m[0], d[2], d[3], ca); - d[3] = (ca + 127) / 255; + ca = NR_COMPOSEA_112(m[0], d[3]); + d[0] = NR_COMPOSENNN_111121 (v, m[0], d[0], d[3], ca); + d[1] = NR_COMPOSENNN_111121 (v, m[0], d[1], d[3], ca); + d[2] = NR_COMPOSENNN_111121 (v, m[0], d[2], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); } break; case NR_PIXBLOCK_MODE_R8G8B8A8P: - d[0] = NR_COMPOSENPP (v, m[0], d[0], d[3]); - d[1] = NR_COMPOSENPP (v, m[0], d[1], d[3]); - d[2] = NR_COMPOSENPP (v, m[0], d[2], d[3]); - d[3] = (NR_A7 (d[3], m[0]) + 127) / 255; + d[0] = NR_COMPOSENPP_1111 (v, m[0], d[0]); + d[1] = NR_COMPOSENPP_1111 (v, m[0], d[1]); + d[2] = NR_COMPOSENPP_1111 (v, m[0], d[2]); + d[3] = NR_COMPOSEA_111(d[3], m[0]); break; default: break; diff --git a/src/libnr/nr-pixblock-pixel.cpp b/src/libnr/nr-pixblock-pixel.cpp index c778c0c7f..109ed69dc 100644 --- a/src/libnr/nr-pixblock-pixel.cpp +++ b/src/libnr/nr-pixblock-pixel.cpp @@ -47,14 +47,14 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP d[2] = s[2]; break; case NR_PIXBLOCK_MODE_R8G8B8A8N: - d[0] = NR_COMPOSEN11 (s[0], s[3], 255); - d[1] = NR_COMPOSEN11 (s[1], s[3], 255); - d[2] = NR_COMPOSEN11 (s[2], s[3], 255); + d[0] = NR_COMPOSEN11_1111 (s[0], s[3], 255); + d[1] = NR_COMPOSEN11_1111 (s[1], s[3], 255); + d[2] = NR_COMPOSEN11_1111 (s[2], s[3], 255); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: - d[0] = NR_COMPOSEP11 (s[0], s[3], 255); - d[1] = NR_COMPOSEP11 (s[1], s[3], 255); - d[2] = NR_COMPOSEP11 (s[2], s[3], 255); + d[0] = NR_COMPOSEP11_1111 (s[0], s[3], 255); + d[1] = NR_COMPOSEP11_1111 (s[1], s[3], 255); + d[2] = NR_COMPOSEP11_1111 (s[2], s[3], 255); break; default: break; @@ -82,9 +82,9 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP d[1] = 255; d[2] = 255; } else { - d[0] = (s[0] * 255) / s[3]; - d[1] = (s[1] * 255) / s[3]; - d[2] = (s[2] * 255) / s[3]; + d[0] = NR_DEMUL_111(s[0], s[3]); + d[1] = NR_DEMUL_111(s[0], s[3]); + d[2] = NR_DEMUL_111(s[0], s[3]); } d[3] = s[3]; break; @@ -103,9 +103,9 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP d[3] = 255; break; case NR_PIXBLOCK_MODE_R8G8B8A8N: - d[0] = NR_PREMUL (s[0], s[3]); - d[1] = NR_PREMUL (s[1], s[3]); - d[2] = NR_PREMUL (s[2], s[3]); + d[0] = NR_PREMUL_111 (s[0], s[3]); + d[1] = NR_PREMUL_111 (s[1], s[3]); + d[2] = NR_PREMUL_111 (s[2], s[3]); d[3] = s[3]; break; case NR_PIXBLOCK_MODE_R8G8B8A8P: @@ -132,10 +132,10 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP d[0] = 255; break; case NR_PIXBLOCK_MODE_R8G8B8A8N: - d[0] = NR_A7_NORMALIZED(s[3],d[0]); + d[0] = NR_COMPOSEA_111(s[3], d[0]); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: - d[0] = NR_A7_NORMALIZED(s[3],d[0]); + d[0] = NR_COMPOSEA_111(s[3], d[0]); break; default: break; @@ -151,14 +151,14 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP d[2] = s[2]; break; case NR_PIXBLOCK_MODE_R8G8B8A8N: - d[0] = NR_COMPOSEN11 (s[0], s[3], d[0]); - d[1] = NR_COMPOSEN11 (s[1], s[3], d[1]); - d[2] = NR_COMPOSEN11 (s[2], s[3], d[2]); + d[0] = NR_COMPOSEN11_1111 (s[0], s[3], d[0]); + d[1] = NR_COMPOSEN11_1111 (s[1], s[3], d[1]); + d[2] = NR_COMPOSEN11_1111 (s[2], s[3], d[2]); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: - d[0] = NR_COMPOSEP11 (s[0], s[3], d[0]); - d[1] = NR_COMPOSEP11 (s[1], s[3], d[1]); - d[2] = NR_COMPOSEP11 (s[2], s[3], d[2]); + d[0] = NR_COMPOSEP11_1111 (s[0], s[3], d[0]); + d[1] = NR_COMPOSEP11_1111 (s[1], s[3], d[1]); + d[2] = NR_COMPOSEP11_1111 (s[2], s[3], d[2]); break; default: break; @@ -176,21 +176,21 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP case NR_PIXBLOCK_MODE_R8G8B8A8N: if (s[3] != 0) { unsigned int ca; - ca = NR_A7 (s[3], d[3]); - d[0] = NR_COMPOSENNN_A7 (s[0], s[3], d[0], d[3], ca); - d[1] = NR_COMPOSENNN_A7 (s[1], s[3], d[1], d[3], ca); - d[2] = NR_COMPOSENNN_A7 (s[2], s[3], d[2], d[3], ca); - d[3] = (ca + 127) / 255; + ca = NR_COMPOSEA_112(s[3], d[3]); + d[0] = NR_COMPOSENNN_111121 (s[0], s[3], d[0], d[3], ca); + d[1] = NR_COMPOSENNN_111121 (s[1], s[3], d[1], d[3], ca); + d[2] = NR_COMPOSENNN_111121 (s[2], s[3], d[2], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); } break; case NR_PIXBLOCK_MODE_R8G8B8A8P: if (s[3] != 0) { unsigned int ca; - ca = NR_A7 (s[3], d[3]); - d[0] = NR_COMPOSEPNN_A7 (s[0], s[3], d[0], d[3], ca); - d[1] = NR_COMPOSEPNN_A7 (s[1], s[3], d[0], d[3], ca); - d[2] = NR_COMPOSEPNN_A7 (s[2], s[3], d[0], d[3], ca); - d[3] = (ca + 127) / 255; + ca = NR_COMPOSEA_112(s[3], d[3]); + d[0] = NR_COMPOSEPNN_111121 (s[0], s[3], d[0], d[3], ca); + d[1] = NR_COMPOSEPNN_111121 (s[1], s[3], d[0], d[3], ca); + d[2] = NR_COMPOSEPNN_111121 (s[2], s[3], d[0], d[3], ca); + d[3] = NR_NORMALIZE_21(ca); } break; default: @@ -207,16 +207,16 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP d[2] = s[2]; break; case NR_PIXBLOCK_MODE_R8G8B8A8N: - d[0] = NR_COMPOSENPP (s[0], s[3], d[0], d[3]); - d[1] = NR_COMPOSENPP (s[1], s[3], d[1], d[3]); - d[2] = NR_COMPOSENPP (s[2], s[3], d[2], d[3]); - d[3] = NR_A7_NORMALIZED(s[3],d[3]); + d[0] = NR_COMPOSENPP_1111 (s[0], s[3], d[0]); + d[1] = NR_COMPOSENPP_1111 (s[1], s[3], d[1]); + d[2] = NR_COMPOSENPP_1111 (s[2], s[3], d[2]); + d[3] = NR_COMPOSEA_111(s[3], d[3]); break; case NR_PIXBLOCK_MODE_R8G8B8A8P: - d[0] = NR_COMPOSEPPP (s[0], s[3], d[0], d[3]); - d[1] = NR_COMPOSEPPP (s[1], s[3], d[1], d[3]); - d[2] = NR_COMPOSEPPP (s[2], s[3], d[2], d[3]); - d[3] = NR_A7_NORMALIZED(s[3],d[3]); + d[0] = NR_COMPOSEPPP_1111 (s[0], s[3], d[0]); + d[1] = NR_COMPOSEPPP_1111 (s[1], s[3], d[1]); + d[2] = NR_COMPOSEPPP_1111 (s[2], s[3], d[2]); + d[3] = NR_COMPOSEA_111(s[3], d[3]); break; default: break; diff --git a/src/libnr/nr-pixops.h b/src/libnr/nr-pixops.h index ba7fbc41a..2c41f8dbf 100644 --- a/src/libnr/nr-pixops.h +++ b/src/libnr/nr-pixops.h @@ -16,34 +16,119 @@ #define NR_RGBA32_B(v) (unsigned char) (((v) >> 8) & 0xff) #define NR_RGBA32_A(v) (unsigned char) ((v) & 0xff) -#define FAST_DIVIDE_BY_255(v) ((((v) << 8) + (v) + 257) >> 16) +// FAST_DIVIDE assumes that 0<=num<=256*denom +// (this covers the case that num=255*denom+denom/2, which is used by DIV_ROUND) +template static inline unsigned int FAST_DIVIDE(unsigned int v) { return v/divisor; } +template<> static inline unsigned int FAST_DIVIDE<255>(unsigned int v) { return ((v+1)*0x101) >> 16; } +template<> static inline unsigned int FAST_DIVIDE<255*255>(unsigned int v) { v=(v+1)<<1; v=v+(v>>7)+((v*0x3)>>16)+(v>>22); return (v>>16)>>1; } +// FAST_DIV_ROUND assumes that 0<=num<=255*denom (DIV_ROUND should work upto num=2^32-1-(denom/2), +// but FAST_DIVIDE_BY_255 already fails at num=65790=258*255, which is not too far above 255.5*255) +template static inline unsigned int FAST_DIV_ROUND(unsigned int v) { return FAST_DIVIDE(v+(divisor)/2); } +static inline unsigned int DIV_ROUND(unsigned int v, unsigned int divisor) { return (v+divisor/2)/divisor; } -#define NR_A7(fa,ba) (65025 - (255 - fa) * (255 - ba)) -#define NR_COMPOSENNN_A7(fc,fa,bc,ba,a) (((255 - (fa)) * (bc) * (ba) + (fa) * (fc) * 255 + 127) / a) -#define NR_COMPOSEPNN_A7(fc,fa,bc,ba,a) (((255 - (fa)) * (bc) * (ba) + (fc) * 65025 + 127) / a) -#define NR_COMPOSENNP(fc,fa,bc,ba) (((255 - (fa)) * (bc) * (ba) + (fa) * (fc) * 255 + 32512) / 65025) -#define NR_COMPOSEPNP(fc,fa,bc,ba) (((255 - (fa)) * (bc) * (ba) + (fc) * 65025 + 32512) / 65025) -#define INK_COMPOSE(f,a,b) ( ( ((guchar) b) * ((guchar) (0xff - a)) + ((guchar) ((b ^ ~f) + b/4 - (b>127? 63 : 0))) * ((guchar) a) ) >>8) -#define NR_PREMUL(c,a) (FAST_DIVIDE_BY_255(((c) * (a) + 127))) -#define NR_PREMUL_SINGLE(c) (FAST_DIVIDE_BY_255((c) + 127)) +#define INK_COMPOSE(f,a,b) ( ( ((guchar) (b)) * ((guchar) (0xff - (a))) + ((guchar) (((b) ^ ~(f)) + (b)/4 - ((b)>127? 63 : 0))) * ((guchar) (a)) ) >>8) -#if 0 +// Naming: OPb_i+o +// OP = operation, for example: NORMALIZE, COMPOSEA, COMPOSENNN, PREMUL, etc. +// i+o = range of input/output as powers of 2^8-1 +// for example, 213 means 0<=a<=255^2, 0<=b<=255, 0<=output<=255^3 -#define NR_A7_NORMALIZED(fa,ba) (FAST_DIVIDE_BY_255((65025 - (255 - (fa)) * (255 - (ba))) + 127)) -#define NR_COMPOSENPP(fc,fa,bc,ba) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fa) * (fc) + 127)) -#define NR_COMPOSEPPP(fc,fa,bc,ba) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fc) * 255 + 127)) -#define NR_COMPOSEP11(fc,fa,bc) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fc) * 255 + 127)) -#define NR_COMPOSEN11(fc,fa,bc) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fc) * (fa) + 127)) +// Normalize +static inline unsigned int NR_NORMALIZE_11(unsigned int v) { return v; } +static inline unsigned int NR_NORMALIZE_21(unsigned int v) { return FAST_DIV_ROUND<255>(v); } +static inline unsigned int NR_NORMALIZE_31(unsigned int v) { return FAST_DIV_ROUND<255*255>(v); } +static inline unsigned int NR_NORMALIZE_41(unsigned int v) { return FAST_DIV_ROUND<255*255*255>(v); } -#else +// Compose alpha channel using (1 - (1-a)*(1-b)) +// Note that these can also be rewritten to NR_COMPOSENPP(255, a, b), slightly slower, but could help if someone +// decides to use SSE or something similar (for allowing the four components to be treated the same way). +static inline unsigned int NR_COMPOSEA_213(unsigned int a, unsigned int b) { return 255*255*255 - (255*255-a)*(255-b); } +static inline unsigned int NR_COMPOSEA_112(unsigned int a, unsigned int b) { return 255*255 - (255-a)*(255-b); } +static inline unsigned int NR_COMPOSEA_211(unsigned int a, unsigned int b) { return NR_NORMALIZE_31(NR_COMPOSEA_213(a, b)); } +static inline unsigned int NR_COMPOSEA_111(unsigned int a, unsigned int b) { return NR_NORMALIZE_21(NR_COMPOSEA_112(a, b)); } -inline int NR_A7_NORMALIZED(int fa,int ba) {int temp=(65025 - (255 - (fa)) * (255 - (ba))) + 127; return FAST_DIVIDE_BY_255(temp);} -inline int NR_COMPOSENPP(int fc,int fa,int bc,int ba) {int temp=(255 - (fa)) * (bc) + (fa) * (fc) + 127; return FAST_DIVIDE_BY_255(temp);} -inline int NR_COMPOSEPPP(int fc,int fa,int bc,int ba) {int temp=(255 - (fa)) * (bc) + (fc) * 255 + 127; return FAST_DIVIDE_BY_255(temp);} -inline int NR_COMPOSEP11(int fc,int fa,int bc) {int temp=(255 - (fa)) * (bc) + (fc) * 255 + 127; return FAST_DIVIDE_BY_255(temp);} -inline int NR_COMPOSEN11(int fc,int fa,int bc) {int temp=(255 - (fa)) * (bc) + (fc) * (fa) + 127; return FAST_DIVIDE_BY_255(temp);} +// Operation: (1 - fa) * bc * ba + fa * fc +static inline unsigned int NR_COMPOSENNP_12114(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255*255 - fa) * ba * bc + 255 * fa * fc; } +static inline unsigned int NR_COMPOSENNP_11113(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255 - fa) * ba * bc + 255 * fa * fc; } +static inline unsigned int NR_COMPOSENNP_11111(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return NR_NORMALIZE_31(NR_COMPOSENNP_11113(fc, fa, bc, ba)); } + +// Operation: (1 - fa) * bc * ba + fc +static inline unsigned int NR_COMPOSEPNP_22114(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255*255 - fa) * ba * bc + 255*255 * fc; } +static inline unsigned int NR_COMPOSEPNP_11113(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255 - fa) * ba * bc + 255*255 * fc; } +static inline unsigned int NR_COMPOSEPNP_22111(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return NR_NORMALIZE_41(NR_COMPOSEPNP_22114(fc, fa, bc, ba)); } +static inline unsigned int NR_COMPOSEPNP_11111(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return NR_NORMALIZE_31(NR_COMPOSEPNP_11113(fc, fa, bc, ba)); } + +// Operation: ((1 - fa) * bc * ba + fa * fc)/a +// Reuses non-normalized versions of NR_COMPOSENNP +static inline unsigned int NR_COMPOSENNN_121131(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSENNP_12114(fc, fa, bc, ba), a); } +static inline unsigned int NR_COMPOSENNN_111121(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSENNP_11113(fc, fa, bc, ba), a); } + +// Operation: ((1 - fa) * bc * ba + fc)/a +// Reuses non-normalized versions of NR_COMPOSEPNP +static inline unsigned int NR_COMPOSEPNN_221131(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSEPNP_22114(fc, fa, bc, ba), a); } +static inline unsigned int NR_COMPOSEPNN_111121(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSEPNP_11113(fc, fa, bc, ba), a); } + +// Operation: (1 - fa) * bc + fa * fc +// (1-fa)*bc+fa*fc = bc-fa*bc+fa*fc = bc+fa*(fc-bc) +// For some reason it's faster to leave the initial 255*bc term in the non-normalized version instead of factoring it out... +static inline unsigned int NR_COMPOSENPP_1213(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*255*bc + fa*(fc-bc); } +static inline unsigned int NR_COMPOSENPP_1123(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*bc + fa*(255*fc-bc); } +static inline unsigned int NR_COMPOSENPP_1112(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*bc + fa*(fc-bc); } +static inline unsigned int NR_COMPOSENPP_1211(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_31(NR_COMPOSENPP_1213(fc, fa, bc)); } +static inline unsigned int NR_COMPOSENPP_1121(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_31(NR_COMPOSENPP_1123(fc, fa, bc)); } +static inline unsigned int NR_COMPOSENPP_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21(NR_COMPOSENPP_1112(fc, fa, bc)); } + +// Operation: (1 - fa) * bc + fc +// (1-fa)*bc+fc = bc-fa*bc+fc = (bc+fc)-fa*bc +// This rewritten form results in faster code (found out through testing) +static inline unsigned int NR_COMPOSEPPP_2224(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*255*(bc+fc) - fa*bc; } + // NR_COMPOSEPPP_2224 assumes that fa and fc have a common component (fa=a*x and fc=c*x), because then the maximum value is: + // (255*255-255*x)*255*255 + 255*x*255*255 = 255*255*( (255*255-255*x) + 255*x ) = 255*255*255*( (255-x)+x ) = 255*255*255*255 +static inline unsigned int NR_COMPOSEPPP_2213(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*(255*bc+fc) - fa*bc; } +static inline unsigned int NR_COMPOSEPPP_1213(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*255*(bc+fc) - fa*bc; } +static inline unsigned int NR_COMPOSEPPP_1112(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*(bc+fc) - fa*bc; } +static inline unsigned int NR_COMPOSEPPP_2221(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_41(NR_COMPOSEPPP_2224(fc, fa, bc)); } +static inline unsigned int NR_COMPOSEPPP_2211(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_31(NR_COMPOSEPPP_2213(fc, fa, bc)); } +static inline unsigned int NR_COMPOSEPPP_1211(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21(NR_COMPOSEPPP_1213(fc, fa, bc)); } +static inline unsigned int NR_COMPOSEPPP_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21(NR_COMPOSEPPP_1112(fc, fa, bc)); } + +#define NR_COMPOSEN11_1211 NR_COMPOSENPP_1211 +#define NR_COMPOSEN11_1111 NR_COMPOSENPP_1111 +//inline unsigned int NR_COMPOSEN11_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21((255 - fa) * bc + fa * fc ); } + +#define NR_COMPOSEP11_2211 NR_COMPOSEPPP_2211 +#define NR_COMPOSEP11_1211 NR_COMPOSEPPP_1211 +#define NR_COMPOSEP11_1111 NR_COMPOSEPPP_1111 +//inline unsigned int NR_COMPOSEP11_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21((255 - fa) * bc + fc * 255); } + +// Premultiply using c*a +static inline unsigned int NR_PREMUL_134(unsigned int c, unsigned int a) { return c * a; } +static inline unsigned int NR_PREMUL_224(unsigned int c, unsigned int a) { return c * a; } +static inline unsigned int NR_PREMUL_123(unsigned int c, unsigned int a) { return c * a; } +static inline unsigned int NR_PREMUL_112(unsigned int c, unsigned int a) { return c * a; } +static inline unsigned int NR_PREMUL_314(unsigned int c, unsigned int a) { return NR_PREMUL_134(c, a); } +static inline unsigned int NR_PREMUL_213(unsigned int c, unsigned int a) { return NR_PREMUL_123(c, a); } +static inline unsigned int NR_PREMUL_131(unsigned int c, unsigned int a) { return NR_NORMALIZE_41(NR_PREMUL_134(c, a)); } +static inline unsigned int NR_PREMUL_221(unsigned int c, unsigned int a) { return NR_NORMALIZE_41(NR_PREMUL_224(c, a)); } +static inline unsigned int NR_PREMUL_121(unsigned int c, unsigned int a) { return NR_NORMALIZE_31(NR_PREMUL_123(c, a)); } +static inline unsigned int NR_PREMUL_111(unsigned int c, unsigned int a) { return NR_NORMALIZE_21(NR_PREMUL_112(c, a)); } +static inline unsigned int NR_PREMUL_311(unsigned int c, unsigned int a) { return NR_NORMALIZE_41(NR_PREMUL_314(c, a)); } +static inline unsigned int NR_PREMUL_211(unsigned int c, unsigned int a) { return NR_NORMALIZE_31(NR_PREMUL_213(c, a)); } + +// Demultiply using c/a +static inline unsigned int NR_DEMUL_131(unsigned int c, unsigned int a) { return DIV_ROUND(255 * 255 * 255 * c, a); } +static inline unsigned int NR_DEMUL_231(unsigned int c, unsigned int a) { return DIV_ROUND(255 * 255 * c, a); } +static inline unsigned int NR_DEMUL_121(unsigned int c, unsigned int a) { return DIV_ROUND(255 * 255 * c, a); } +static inline unsigned int NR_DEMUL_331(unsigned int c, unsigned int a) { return DIV_ROUND(255 * c, a); } +static inline unsigned int NR_DEMUL_221(unsigned int c, unsigned int a) { return DIV_ROUND(255 * c, a); } +static inline unsigned int NR_DEMUL_111(unsigned int c, unsigned int a) { return DIV_ROUND(255 * c, a); } +static inline unsigned int NR_DEMUL_431(unsigned int c, unsigned int a) { return DIV_ROUND(c, a); } +static inline unsigned int NR_DEMUL_321(unsigned int c, unsigned int a) { return DIV_ROUND(c, a); } +static inline unsigned int NR_DEMUL_211(unsigned int c, unsigned int a) { return DIV_ROUND(c, a); } +static inline unsigned int NR_DEMUL_421(unsigned int c, unsigned int a) { return DIV_ROUND(c, 255 * a); } +static inline unsigned int NR_DEMUL_311(unsigned int c, unsigned int a) { return DIV_ROUND(c, 255 * a); } +static inline unsigned int NR_DEMUL_411(unsigned int c, unsigned int a) { return DIV_ROUND(c, 255 * 255 * a); } -#endif #endif diff --git a/src/make.exclude b/src/make.exclude index d56a6bdff..edcfe0097 100644 --- a/src/make.exclude +++ b/src/make.exclude @@ -26,6 +26,8 @@ extract-uri-test.cpp helper/units-test.cpp inkview.cpp libnr/in-svg-plane-test.cpp +libnr/nr-compose-reference.cpp +libnr/nr-compose-test.cpp libnr/nr-matrix-test.cpp libnr/nr-point-fns-test.cpp libnr/nr-rotate-fns-test.cpp