Code

patch for rendering quality and speed from Jasper van de Gronde
authorbuliabyak <buliabyak@users.sourceforge.net>
Sat, 29 Jul 2006 17:07:59 +0000 (17:07 +0000)
committerbuliabyak <buliabyak@users.sourceforge.net>
Sat, 29 Jul 2006 17:07:59 +0000 (17:07 +0000)
16 files changed:
src/display/canvas-bpath.cpp
src/display/canvas-grid.cpp
src/display/guideline.cpp
src/display/nr-arena-item.cpp
src/display/nr-arena-shape.cpp
src/display/nr-gradient-gpl.cpp
src/display/nr-plain-stuff.cpp
src/libnr/nr-blit.cpp
src/libnr/nr-compose-transform.cpp
src/libnr/nr-compose.cpp
src/libnr/nr-compose.h
src/libnr/nr-gradient.cpp
src/libnr/nr-pixblock-pattern.cpp
src/libnr/nr-pixblock-pixel.cpp
src/libnr/nr-pixops.h
src/make.exclude

index 17c88001795514f6a59c83afe495b031b3c44768..909ea84ec1db61f65ce75b05ce7cb12530e4582a 100644 (file)
@@ -344,9 +344,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven)
        if ( sv > 249.999 ) {
            /* Simple copy */
            while (len > 0) {
-               d[0] = NR_COMPOSEN11 (r, 255, d[0]);
-               d[1] = NR_COMPOSEN11 (g, 255, d[1]);
-               d[2] = NR_COMPOSEN11 (b, 255, d[2]);
+               d[0] = NR_COMPOSEN11_1111 (r, 255, d[0]);
+               d[1] = NR_COMPOSEN11_1111 (g, 255, d[1]);
+               d[2] = NR_COMPOSEN11_1111 (b, 255, d[2]);
                d += 3;
                len -= 1;
            }
@@ -354,9 +354,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven)
            unsigned int c0_24=(int)sv;
            c0_24&=0xFF;
            while (len > 0) {
-               d[0] = NR_COMPOSEN11 (r, c0_24, d[0]);
-               d[1] = NR_COMPOSEN11 (g, c0_24, d[1]);
-               d[2] = NR_COMPOSEN11 (b, c0_24, d[2]);
+               d[0] = NR_COMPOSEN11_1111 (r, c0_24, d[0]);
+               d[1] = NR_COMPOSEN11_1111 (g, c0_24, d[1]);
+               d[2] = NR_COMPOSEN11_1111 (b, c0_24, d[2]);
                d += 3;
                len -= 1;
            }
@@ -366,9 +366,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven)
            sv=0.5*(vst+ven);
            unsigned int c0_24=(int)sv;
            c0_24&=0xFF;
-           d[0] = NR_COMPOSEN11 (r, c0_24, d[0]);
-           d[1] = NR_COMPOSEN11 (g, c0_24, d[1]);
-           d[2] = NR_COMPOSEN11 (b, c0_24, d[2]);
+           d[0] = NR_COMPOSEN11_1111 (r, c0_24, d[0]);
+           d[1] = NR_COMPOSEN11_1111 (g, c0_24, d[1]);
+           d[2] = NR_COMPOSEN11_1111 (b, c0_24, d[2]);
        } else {
            dv/=len;
            sv+=0.5*dv; // correction trapezoidale
@@ -381,9 +381,9 @@ bpath_run_A8_OR (raster_info &dest,void *data,int st,float vst,int en,float ven)
                /* Draw */
                ca = c0_24 >> 16;
                if ( ca > 255 ) ca=255;
-               d[0] = NR_COMPOSEN11 (r, ca, d[0]);
-               d[1] = NR_COMPOSEN11 (g, ca, d[1]);
-               d[2] = NR_COMPOSEN11 (b, ca, d[2]);
+               d[0] = NR_COMPOSEN11_1111 (r, ca, d[0]);
+               d[1] = NR_COMPOSEN11_1111 (g, ca, d[1]);
+               d[2] = NR_COMPOSEN11_1111 (b, ca, d[2]);
                d += 3;
                c0_24 += s0_24;
                c0_24 = CLAMP (c0_24, 0, 16777216);
index 6618c2358a34ed0ac04e68c2f8de7224d40f56d7..9fb6974f8e8878ccf4179d2f2eb10cd69abe124d 100644 (file)
@@ -159,9 +159,9 @@ sp_grid_hline (SPCanvasBuf *buf, gint y, gint xs, gint xe, guint32 rgba)
         x1 = MIN (buf->rect.x1, xe + 1);
         p = buf->buf + (y - buf->rect.y0) * buf->buf_rowstride + (x0 - buf->rect.x0) * 3;
         for (x = x0; x < x1; x++) {
-            p[0] = NR_COMPOSEN11 (r, a, p[0]);
-            p[1] = NR_COMPOSEN11 (g, a, p[1]);
-            p[2] = NR_COMPOSEN11 (b, a, p[2]);
+            p[0] = NR_COMPOSEN11_1111 (r, a, p[0]);
+            p[1] = NR_COMPOSEN11_1111 (g, a, p[1]);
+            p[2] = NR_COMPOSEN11_1111 (b, a, p[2]);
             p += 3;
         }
     }
@@ -182,9 +182,9 @@ sp_grid_vline (SPCanvasBuf *buf, gint x, gint ys, gint ye, guint32 rgba)
         y1 = MIN (buf->rect.y1, ye + 1);
         p = buf->buf + (y0 - buf->rect.y0) * buf->buf_rowstride + (x - buf->rect.x0) * 3;
         for (y = y0; y < y1; y++) {
-            p[0] = NR_COMPOSEN11 (r, a, p[0]);
-            p[1] = NR_COMPOSEN11 (g, a, p[1]);
-            p[2] = NR_COMPOSEN11 (b, a, p[2]);
+            p[0] = NR_COMPOSEN11_1111 (r, a, p[0]);
+            p[1] = NR_COMPOSEN11_1111 (g, a, p[1]);
+            p[2] = NR_COMPOSEN11_1111 (b, a, p[2]);
             p += buf->buf_rowstride;
         }
     }
index d44ac8ab80cd2521807ed35e089f2d00cbe17f05..41429b408d27a7b74176250f9d9fc29f50b4a559 100644 (file)
@@ -115,9 +115,9 @@ static void sp_guideline_render(SPCanvasItem *item, SPCanvasBuf *buf)
     }
 
     for (int p = p0; p < p1; p++) {
-        d[0] = NR_COMPOSEN11(r, a, d[0]);
-        d[1] = NR_COMPOSEN11(g, a, d[1]);
-        d[2] = NR_COMPOSEN11(b, a, d[2]);
+        d[0] = NR_COMPOSEN11_1111(r, a, d[0]);
+        d[1] = NR_COMPOSEN11_1111(g, a, d[1]);
+        d[2] = NR_COMPOSEN11_1111(b, a, d[2]);
         d += step;
     }
 }
index 7e03c51dd2cf10506c738fe52d698255485e0163..581bee231054d8c8bf5563c72434c7105a0fb741 100644 (file)
@@ -627,8 +627,8 @@ unsigned int nr_arena_item_invoke_render(NRArenaItem *item, NRRectL const *area,
               d = NR_PIXBLOCK_PX (&mpb) + (y - carea.y0) * mpb.rs;
               for (x = carea.x0; x < carea.x1; x++) {
                 unsigned int m;
-                m = ((s[0] + s[1] + s[2]) * s[3] + 127) / (3 * 255);
-                d[0] = NR_PREMUL (d[0], m);
+                m = NR_PREMUL_112(s[0]+s[1]+s[2], s[3]);
+                d[0] = FAST_DIV_ROUND<3*255*255>(NR_PREMUL_123(d[0], m));
                 s += 4;
                 d += 1;
               }
@@ -641,8 +641,8 @@ unsigned int nr_arena_item_invoke_render(NRArenaItem *item, NRRectL const *area,
               d = NR_PIXBLOCK_PX (&mpb) + (y - carea.y0) * mpb.rs;
               for (x = carea.x0; x < carea.x1; x++) {
                 unsigned int m;
-                m = ((s[0] + s[1] + s[2]) * s[3] + 127) / (3 * 255);
-                d[0] = m;
+                m = NR_PREMUL_112(s[0]+s[1]+s[2], s[3]);
+                d[0] = FAST_DIV_ROUND<3*255>(m);
                 s += 4;
                 d += 1;
               }
@@ -660,7 +660,7 @@ unsigned int nr_arena_item_invoke_render(NRArenaItem *item, NRRectL const *area,
             unsigned char *d;
             d = NR_PIXBLOCK_PX (&mpb) + (y - carea.y0) * mpb.rs;
             for (x = carea.x0; x < carea.x1; x++) {
-              d[0] = NR_PREMUL (d[0], a);
+              d[0] = NR_PREMUL_111 (d[0], a);
               d += 1;
             }
           }
index abe343a1dac834e7107faccc7390b6ba121d3471..4653b5ce68c79c9182d59c5b11857746e6232cba 100644 (file)
@@ -801,7 +801,7 @@ nr_arena_shape_clip(NRArenaItem *item, NRRectL *area, NRPixBlock *pb)
             s = NR_PIXBLOCK_PX(&m) + (y - area->y0) * m.rs;
             d = NR_PIXBLOCK_PX(pb) + (y - area->y0) * pb->rs;
             for (int x = area->x0; x < area->x1; x++) {
-                *d = NR_A7_NORMALIZED(*s,*d);
+                *d = NR_COMPOSEA_111(*s, *d);
                 d ++;
                 s ++;
             }
@@ -1148,10 +1148,8 @@ shape_run_A8_OR(raster_info &dest,void */*data*/,int st,float vst,int en,float v
             unsigned int c0_24=(int)sv;
             c0_24&=0xFF;
             while (len > 0) {
-                unsigned int da;
                 /* Draw */
-                da = NR_A7(c0_24,d[0]);
-                d[0] = NR_PREMUL_SINGLE(da);
+                d[0] = NR_COMPOSEA_111(c0_24,d[0]);
                 d += 1;
                 len -= 1;
             }
@@ -1162,10 +1160,8 @@ shape_run_A8_OR(raster_info &dest,void */*data*/,int st,float vst,int en,float v
             sv*=256;
             unsigned int c0_24=(int)sv;
             c0_24&=0xFF;
-            unsigned int da;
             /* Draw */
-            da = NR_A7(c0_24,d[0]);
-            d[0] = NR_PREMUL_SINGLE(da);
+            d[0] = NR_COMPOSEA_111(c0_24,d[0]);
         } else {
             dv/=len;
             sv+=0.5*dv; // correction trapezoidale
@@ -1174,12 +1170,11 @@ shape_run_A8_OR(raster_info &dest,void */*data*/,int st,float vst,int en,float v
             int c0_24 = static_cast<int>(CLAMP(sv, 0, 16777216));
             int s0_24 = static_cast<int>(dv);
             while (len > 0) {
-                unsigned int ca, da;
+                unsigned int ca;
                 /* Draw */
                 ca = c0_24 >> 16;
                 if ( ca > 255 ) ca=255;
-                da = NR_A7(ca,d[0]);
-                d[0] = NR_PREMUL_SINGLE(da);
+                d[0] = NR_COMPOSEA_111(ca,d[0]);
                 d += 1;
                 c0_24 += s0_24;
                 c0_24 = CLAMP(c0_24, 0, 16777216);
index 536217649ae2ea1b0ba8870bb5740475f1207d85..e3ee033259ae2032c10b3094e05dac85148d3a7f 100644 (file)
@@ -200,11 +200,11 @@ nr_lgradient_render_R8G8B8A8N (NRLGradientRenderer *lgr, unsigned char *px, int
                                d[2] = s[2];
                                d[3] = 255;
                        } else if (s[3] != 0) {
-                               ca = NR_A7(s[3],d[3]);
-                               d[0] = NR_COMPOSENNN_A7 (s[0], s[3], d[0], d[3], ca);
-                               d[1] = NR_COMPOSENNN_A7 (s[1], s[3], d[1], d[3], ca);
-                               d[2] = NR_COMPOSENNN_A7 (s[2], s[3], d[2], d[3], ca);
-                               d[3] = NR_PREMUL_SINGLE(ca);
+                               ca = NR_COMPOSEA_112(s[3],d[3]);
+                               d[0] = NR_COMPOSENNN_111121 (s[0], s[3], d[0], d[3], ca);
+                               d[1] = NR_COMPOSENNN_111121 (s[1], s[3], d[1], d[3], ca);
+                               d[2] = NR_COMPOSENNN_111121 (s[2], s[3], d[2], d[3], ca);
+                               d[3] = NR_NORMALIZE_21(ca);
                        }
                        d += 4;
                        pos += lgr->dx;
@@ -242,9 +242,9 @@ nr_lgradient_render_R8G8B8 (NRLGradientRenderer *lgr, unsigned char *px, int x0,
                        }
                        /* Full composition */
                        s = lgr->vector + 4 * idx;
-                       d[0] = NR_COMPOSEN11 (s[0], s[3], d[0]);
-                       d[1] = NR_COMPOSEN11 (s[1], s[3], d[1]);
-                       d[2] = NR_COMPOSEN11 (s[2], s[3], d[2]);
+                       d[0] = NR_COMPOSEN11_1111 (s[0], s[3], d[0]);
+                       d[1] = NR_COMPOSEN11_1111 (s[1], s[3], d[1]);
+                       d[2] = NR_COMPOSEN11_1111 (s[2], s[3], d[2]);
                        d += 3;
                        pos += lgr->dx;
                }
index af6e002ec987bd6f5e0908f5669299054cf5f420..62a61102ea05a1599d9e0d5f64a5a7f967c7b3a9 100644 (file)
@@ -79,14 +79,14 @@ nr_render_rgba32_rgb (guchar *px, gint w, gint h, gint rs, gint xoff, gint yoff,
        b = NR_RGBA32_B (c);
        a = NR_RGBA32_A (c);
 
-       cr = NR_COMPOSEN11 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR0));
-       cg = NR_COMPOSEN11 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR0));
-       cb = NR_COMPOSEN11 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR0));
+       cr = NR_COMPOSEN11_1111 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR0));
+       cg = NR_COMPOSEN11_1111 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR0));
+       cb = NR_COMPOSEN11_1111 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR0));
        c0 = (cr << 24) | (cg << 16) | (cb << 8) | 0xff;
 
-       cr = NR_COMPOSEN11 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR1));
-       cg = NR_COMPOSEN11 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR1));
-       cb = NR_COMPOSEN11 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR1));
+       cr = NR_COMPOSEN11_1111 (r, a, NR_RGBA32_R (NR_DEFAULT_CHECKERCOLOR1));
+       cg = NR_COMPOSEN11_1111 (g, a, NR_RGBA32_G (NR_DEFAULT_CHECKERCOLOR1));
+       cb = NR_COMPOSEN11_1111 (b, a, NR_RGBA32_B (NR_DEFAULT_CHECKERCOLOR1));
        c1 = (cr << 24) | (cg << 16) | (cb << 8) | 0xff;
 
        nr_render_checkerboard_rgb_custom (px, w, h, rs, xoff, yoff, c0, c1, NR_DEFAULT_CHECKERSIZEP2);
index 2a93bc9bd05d3502fdac2135c0e9576b52fc4b76..b25f0e2a77149a646023297dd3f16e3f5f9b96ce 100644 (file)
@@ -270,24 +270,24 @@ nr_blit_pixblock_mask_rgba32 (NRPixBlock *d, NRPixBlock *m, unsigned long rgba)
                                unsigned int da;
                                switch (d->mode) {
                                case NR_PIXBLOCK_MODE_R8G8B8:
-                                       p[0] = NR_COMPOSEN11 (r, a, p[0]);
-                                       p[1] = NR_COMPOSEN11 (g, a, p[1]);
-                                       p[2] = NR_COMPOSEN11 (b, a, p[2]);
+                                       p[0] = NR_COMPOSEN11_1111 (r, a, p[0]);
+                                       p[1] = NR_COMPOSEN11_1111 (g, a, p[1]);
+                                       p[2] = NR_COMPOSEN11_1111 (b, a, p[2]);
                                        p += 3;
                                        break;
                                case NR_PIXBLOCK_MODE_R8G8B8A8P:
-                                       p[0] = NR_COMPOSENPP (r, a, p[0], p[3]);
-                                       p[1] = NR_COMPOSENPP (g, a, p[1], p[3]);
-                                       p[2] = NR_COMPOSENPP (b, a, p[2], p[3]);
-                                       p[3] = (65025 - (255 - a) * (255 - p[3]) + 127) / 255;
+                                       p[0] = NR_COMPOSENPP_1111 (r, a, p[0]);
+                                       p[1] = NR_COMPOSENPP_1111 (g, a, p[1]);
+                                       p[2] = NR_COMPOSENPP_1111 (b, a, p[2]);
+                                       p[3] = NR_COMPOSEA_111(a, p[3]);
                                        p += 4;
                                        break;
                                case NR_PIXBLOCK_MODE_R8G8B8A8N:
-                                       da = 65025 - (255 - a) * (255 - p[3]);
-                                       p[0] = NR_COMPOSENNN_A7 (r, a, p[0], p[3], da);
-                                       p[1] = NR_COMPOSENNN_A7 (g, a, p[1], p[3], da);
-                                       p[2] = NR_COMPOSENNN_A7 (b, a, p[2], p[3], da);
-                                       p[3] = (da + 127) / 255;
+                                       da = NR_COMPOSEA_112(a, p[3]);
+                                       p[0] = NR_COMPOSENNN_111121 (r, a, p[0], p[3], da);
+                                       p[1] = NR_COMPOSENNN_111121 (g, a, p[1], p[3], da);
+                                       p[2] = NR_COMPOSENNN_111121 (b, a, p[2], p[3], da);
+                                       p[3] = NR_NORMALIZE_21(da);
                                        p += 4;
                                        break;
                                default:
index bb5022a74f078b13af2582c5b7fa87811e73cb4f..afc8fd987b40d0dc9c4bde8cb2c1d98be5892d48 100644 (file)
@@ -121,11 +121,11 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h,
                                                const unsigned char *s;
                                                unsigned int ca;
                                                s = spx + sy * srs + sx * 4;
-                                               ca = NR_PREMUL (s[3], alpha);
-                                               r += NR_PREMUL (s[0], ca);
-                                               g += NR_PREMUL (s[1], ca);
-                                               b += NR_PREMUL (s[2], ca);
-                                               a += ca;
+                                               ca = NR_PREMUL_112 (s[3], alpha);
+                                               r += NR_PREMUL_121 (s[0], ca);
+                                               g += NR_PREMUL_121 (s[1], ca);
+                                               b += NR_PREMUL_121 (s[2], ca);
+                                               a += NR_NORMALIZE_21(ca);
                                        }
                                }
                        }
@@ -143,11 +143,11 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h,
                                } else {
                                        unsigned int ca;
                                        /* Full composition */
-                                       ca = 65025 - (255 - a) * (255 - d[3]);
-                                       d[0] = NR_COMPOSENNN_A7 (r, a, d[0], d[3], ca);
-                                       d[1] = NR_COMPOSENNN_A7 (g, a, d[1], d[3], ca);
-                                       d[2] = NR_COMPOSENNN_A7 (b, a, d[2], d[3], ca);
-                                       d[3] = (ca + 127) / 255;
+                                       ca = NR_COMPOSEA_112(a, d[3]);
+                                       d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca);
+                                       d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca);
+                                       d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca);
+                                       d[3] = NR_NORMALIZE_21(ca);
                                }
                        }
                        /* Advance pointers */
@@ -193,19 +193,19 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h
                                        const unsigned char *s;
                                        unsigned int a;
                                        s = spx + sy * srs + sx * 4;
-                                       a = NR_PREMUL (s[3], alpha);
+                                       a = NR_PREMUL_112 (s[3], alpha);
                                        if (a != 0) {
-                                               if ((a == 255) || (d[3] == 0)) {
+                                               if ((a == 255*255) || (d[3] == 0)) {
                                                        /* Transparent BG, premul src */
-                                                       d[0] = NR_PREMUL (s[0], a);
-                                                       d[1] = NR_PREMUL (s[1], a);
-                                                       d[2] = NR_PREMUL (s[2], a);
-                                                       d[3] = a;
+                                                       d[0] = NR_PREMUL_121 (s[0], a);
+                                                       d[1] = NR_PREMUL_121 (s[1], a);
+                                                       d[2] = NR_PREMUL_121 (s[2], a);
+                                                       d[3] = NR_NORMALIZE_21(a);
                                                } else {
-                                                       d[0] = NR_COMPOSENPP (s[0], a, d[0], d[3]);
-                                                       d[1] = NR_COMPOSENPP (s[1], a, d[1], d[3]);
-                                                       d[2] = NR_COMPOSENPP (s[2], a, d[2], d[3]);
-                                                       d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
+                                                       d[0] = NR_COMPOSENPP_1211 (s[0], a, d[0]);
+                                                       d[1] = NR_COMPOSENPP_1211 (s[1], a, d[1]);
+                                                       d[2] = NR_COMPOSENPP_1211 (s[2], a, d[2]);
+                                                       d[3] = NR_COMPOSEA_211(a, d[3]);
                                                }
                                        }
                                }
@@ -259,10 +259,10 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
                                                const unsigned char *s;
                                                unsigned int ca;
                                                s = spx + sy * srs + sx * 4;
-                                               ca = s[3] * alpha;
-                                               r += s[0] * ca;
-                                               g += s[1] * ca;
-                                               b += s[2] * ca;
+                                               ca = NR_PREMUL_112(s[3], alpha);
+                                               r += NR_PREMUL_123(s[0], ca);
+                                               g += NR_PREMUL_123(s[1], ca);
+                                               b += NR_PREMUL_123(s[2], ca);
                                                a += ca;
                                        }
                                }
@@ -279,10 +279,10 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
                                        d[2] = b;
                                        d[3] = a;
                                } else {
-                                       d[0] = NR_COMPOSEPPP (r, a, d[0], d[3]);
-                                       d[1] = NR_COMPOSEPPP (g, a, d[1], d[3]);
-                                       d[2] = NR_COMPOSEPPP (b, a, d[2], d[3]);
-                                       d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
+                                       d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]);
+                                       d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]);
+                                       d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]);
+                                       d[3] = NR_COMPOSEA_111(a, d[3]);
                                }
                        }
                        /* Advance pointers */
index f0e9c5e4e09268f5e69d46c0d2d0e28d04a3df17..3b99678e26b40255080f69d65f685be95e78284f 100644 (file)
@@ -31,26 +31,26 @@ void nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, c
 #endif /* __cplusplus */
 #endif
 
+// Naming: nr_RESULT_BACKGROUND_FOREGROUND_extra
+
 void
 nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
+       unsigned int r, c;
 
-       for (r = 0; r < h; r++) {
+       for (r = h; r > 0; r--) {
                if (alpha == 0) {
-                       memset (px, 0x0, 4 * w);
+                       memset(px, 0x0, 4 * w);
                } else if (alpha == 255) {
-                       memcpy (px, spx, 4 * w);
+                       memcpy(px, spx, 4 * w);
                } else {
-                       const unsigned char *s;
-                       unsigned char *d;
-                       d = px;
-                       s = spx;
-                       for (c = 0; c < w; c++) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
                                *d++ = *s++;
                                *d++ = *s++;
                                *d++ = *s++;
-                               *d++ = NR_PREMUL (*s, alpha);
+                               *d++ = NR_PREMUL_111(*s, alpha);
                                s++;
                        }
                }
@@ -62,50 +62,68 @@ nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const u
 void
 nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
+       unsigned int r, c;
 
-       for (r = 0; r < h; r++) {
+       for (r = h; r > 0; r--) {
                if (alpha == 0) {
-                       memset (px, 0x0, 4 * w);
+                       memset(px, 0x0, 4 * w);
                } else {
-                       const unsigned char *s;
-                       unsigned char *d;
-                       s = spx;
-                       d = px;
-                       for (c = 0; c < w; c++) {
-                               unsigned int a;
-                               a = NR_PREMUL (s[3], alpha);
-                               d[0] = s[0];
-                               d[1] = s[1];
-                               d[2] = s[2];
-                               d[3] = a;
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       d[3] = 0;
+                               } else if (s[3] == 255) {
+                                       memcpy(d, s, 4);
+                               } else {
+                                       d[0] = NR_DEMUL_111(s[0], s[3]);
+                                       d[1] = NR_DEMUL_111(s[1], s[3]);
+                                       d[2] = NR_DEMUL_111(s[2], s[3]);
+                                       d[3] = NR_PREMUL_111(s[3], alpha);
+                               }
                                d += 4;
                                s += 4;
                        }
-                       px += rs;
-                       spx += srs;
                }
+               px += rs;
+               spx += srs;
        }
 }
 
 void
 nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
+       unsigned int r, c;
 
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (c = 0; c < w; c++) {
-                       unsigned int a;
-                       a = (s[3] * alpha + 127) / 255;
-                       d[0] = (s[0] * a + 127) / 255;
-                       d[1] = (s[1] * a + 127) / 255;
-                       d[2] = (s[2] * a + 127) / 255;
-                       d[3] = a;
-                       d += 4;
-                       s += 4;
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               if (alpha == 0) {
+                       memset(px, 0x0, 4 * w);
+               } else if (alpha == 255) {
+                       for (c = w; c > 0; c--) {
+                               d[0] = NR_PREMUL_111(s[0], s[3]);
+                               d[1] = NR_PREMUL_111(s[1], s[3]);
+                               d[2] = NR_PREMUL_111(s[2], s[3]);
+                               d[3] = s[3];
+                               d += 4;
+                               s += 4;
+                       }
+               } else {
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       memset(d, 0, 4);
+                               } else {
+                                       unsigned int a;
+                                       a = NR_PREMUL_112(s[3], alpha);
+                                       d[0] = NR_PREMUL_121(s[0], a);
+                                       d[1] = NR_PREMUL_121(s[1], a);
+                                       d[2] = NR_PREMUL_121(s[2], a);
+                                       d[3] = NR_NORMALIZE_21(a);
+                               }
+                               d += 4;
+                               s += 4;
+                       }
                }
                px += rs;
                spx += srs;
@@ -115,26 +133,24 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const u
 void
 nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (c = 0; c < w; c++) {
-                       if (alpha == 255) {
-                               d[0] = s[0];
-                               d[1] = s[1];
-                               d[2] = s[2];
-                               d[3] = s[3];
-                       } else {
-                               d[0] = NR_PREMUL (s[0], alpha);
-                               d[1] = NR_PREMUL (s[1], alpha);
-                               d[2] = NR_PREMUL (s[2], alpha);
-                               d[3] = NR_PREMUL (s[3], alpha);
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               if (alpha == 0) {
+                       memset(px, 0x0, 4 * w);
+               } else if (alpha == 255) {
+                       memcpy(px, spx, 4 * w);
+               } else {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               d[0] = NR_PREMUL_111(s[0], alpha);
+                               d[1] = NR_PREMUL_111(s[1], alpha);
+                               d[2] = NR_PREMUL_111(s[2], alpha);
+                               d[3] = NR_PREMUL_111(s[3], alpha);
+                               d += 4;
+                               s += 4;
                        }
-                       d += 4;
-                       s += 4;
                }
                px += rs;
                spx += srs;
@@ -144,167 +160,280 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const u
 void
 nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (c = 0; c < w; c++) {
-                       unsigned int a;
-                       a = NR_PREMUL (s[3], alpha);
-                       if (a == 0) {
-                               /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
-                               /* Full coverage, COPY */
-                               d[0] = s[0];
-                               d[1] = s[1];
-                               d[2] = s[2];
-                               d[3] = a;
-                       } else {
-                               unsigned int ca;
-                               /* Full composition */
-                               ca = 65025 - (255 - a) * (255 - d[3]);
-                               d[0] = NR_COMPOSENNN_A7 (s[0], a, d[0], d[3], ca);
-                               d[1] = NR_COMPOSENNN_A7 (s[1], a, d[1], d[3], ca);
-                               d[2] = NR_COMPOSENNN_A7 (s[2], a, d[2], d[3], ca);
-                               d[3] = (ca + 127) / 255;
+       unsigned int r, c;
+
+       if (alpha == 0) {
+               /* NOP */
+       } else if (alpha == 255) {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if ((s[3] == 255) || (d[3] == 0)) {
+                                       /* Full coverage, COPY */
+                                       memcpy(d, s, 4);
+                               } else {
+                                       /* Full composition */
+                                       unsigned int ca;
+                                       ca = NR_COMPOSEA_112(s[3], d[3]);
+                                       d[0] = NR_COMPOSENNN_111121(s[0], s[3], d[0], d[3], ca);
+                                       d[1] = NR_COMPOSENNN_111121(s[1], s[3], d[1], d[3], ca);
+                                       d[2] = NR_COMPOSENNN_111121(s[2], s[3], d[2], d[3], ca);
+                                       d[3] = NR_NORMALIZE_21(ca);
+                               }
+                               d += 4;
+                               s += 4;
                        }
-                       d += 4;
-                       s += 4;
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               unsigned int a;
+                               a = NR_PREMUL_112(s[3], alpha);
+                               if (a == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if ((a == 255*255) || (d[3] == 0)) {
+                                       /* Full coverage, COPY */
+                                       d[0] = s[0];
+                                       d[1] = s[1];
+                                       d[2] = s[2];
+                                       d[3] = NR_NORMALIZE_21(a);
+                               } else {
+                                       /* Full composition */
+                                       unsigned int ca;
+                                       ca = NR_COMPOSEA_213(a, d[3]);
+                                       d[0] = NR_COMPOSENNN_121131(s[0], a, d[0], d[3], ca);
+                                       d[1] = NR_COMPOSENNN_121131(s[1], a, d[1], d[3], ca);
+                                       d[2] = NR_COMPOSENNN_121131(s[2], a, d[2], d[3], ca);
+                                       d[3] = NR_NORMALIZE_31(ca);
+                               }
+                               d += 4;
+                               s += 4;
+                       }
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
 void
 nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (c = 0; c < w; c++) {
-                       unsigned int a;
-                       a = NR_PREMUL (s[3], alpha);
-                       if (a == 0) {
-                               /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
-                               /* Full coverage, demul src */
-                               d[0] = (s[0] * 255 + (s[3] >> 1)) / s[3];
-                               d[1] = (s[1] * 255 + (s[3] >> 1)) / s[3];
-                               d[2] = (s[2] * 255 + (s[3] >> 1)) / s[3];
-                               d[3] = a;
-                       } else {
-                               if (alpha == 255) {
-                                       unsigned int ca;
+       unsigned int r, c;
+
+       if (alpha == 0) {
+               /* NOP */
+       } else if (alpha == 255) {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (s[3] == 255) {
+                                       /* Full coverage, demul src */
+                                       //   dc' = ((1 - sa) * da*dc + sc)/da' = sc/da' = sc
+                                       //   da' = 1 - (1 - sa) * (1 - da) = 1 - 0 * (1 - da) = 1
+                                       memcpy(d, s, 4);
+                               } else if (d[3] == 0) {
+                                       /* Full coverage, demul src */
+                                       //   dc' = ((1 - sa) * da*dc + sc)/da' = sc/da' = sc/sa = sc/sa
+                                       //   da' = 1 - (1 - sa) * (1 - da) = 1 - (1 - sa) = sa
+                                       d[0] = NR_DEMUL_111(s[0], s[3]);
+                                       d[1] = NR_DEMUL_111(s[1], s[3]);
+                                       d[2] = NR_DEMUL_111(s[2], s[3]);
+                                       d[3] = s[3];
+                               } else {
                                        /* Full composition */
-                                       ca = 65025 - (255 - s[3]) * (255 - d[3]);
-                                       d[0] = NR_COMPOSEPNN_A7 (s[0], s[3], d[0], d[3], ca);
-                                       d[1] = NR_COMPOSEPNN_A7 (s[1], s[3], d[1], d[3], ca);
-                                       d[2] = NR_COMPOSEPNN_A7 (s[2], s[3], d[2], d[3], ca);
-                                       d[3] = (65025 - (255 - s[3]) * (255 - d[3]) + 127) / 255;
+                                       //   dc' = ((1 - sa) * da*dc + sc)/da' = ((1 - sa) * da*dc + sc)/da'
+                                       //   da' = 1 - (1 - sa) * (1 - da) = 1 - (1 - sa) * (1 - da)
+                                       unsigned int da = NR_COMPOSEA_112(s[3], d[3]);
+                                       d[0] = NR_COMPOSEPNN_111121(s[0], s[3], d[0], d[3], da);
+                                       d[1] = NR_COMPOSEPNN_111121(s[1], s[3], d[1], d[3], da);
+                                       d[2] = NR_COMPOSEPNN_111121(s[2], s[3], d[2], d[3], da);
+                                       d[3] = NR_NORMALIZE_21(da);
+                               }
+                               d += 4;
+                               s += 4;
+                       }
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               unsigned int a;
+                               a = NR_PREMUL_112(s[3], alpha);
+                               if (a == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (d[3] == 0) {
+                                       /* Full coverage, demul src */
+                                       //   dc' = ((1 - alpha*sa) * da*dc + alpha*sc)/da' = alpha*sc/da' = alpha*sc/(alpha*sa) = sc/sa
+                                       //   da' = 1 - (1 - alpha*sa) * (1 - da) = 1 - (1 - alpha*sa) = alpha*sa
+                                       d[0] = NR_DEMUL_111(s[0], s[3]);
+                                       d[1] = NR_DEMUL_111(s[1], s[3]);
+                                       d[2] = NR_DEMUL_111(s[2], s[3]);
+                                       d[3] = NR_NORMALIZE_21(a);
                                } else {
-                                       // calculate premultiplied from two premultiplieds:
-                                       d[0] = NR_COMPOSEPPP(NR_PREMUL (s[0], alpha), a, NR_PREMUL (d[0], d[3]), 0); // last parameter not used
-                                       d[1] = NR_COMPOSEPPP(NR_PREMUL (s[1], alpha), a, NR_PREMUL (d[1], d[3]), 0);
-                                       d[2] = NR_COMPOSEPPP(NR_PREMUL (s[2], alpha), a, NR_PREMUL (d[2], d[3]), 0);
-                                       // total opacity:
-                                       d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
-                                       // un-premultiply channels:
-                                       d[0] = d[0]*255/d[3];
-                                       d[1] = d[1]*255/d[3];
-                                       d[2] = d[2]*255/d[3];
+                                       //   dc' = ((1 - alpha*sa) * da*dc + alpha*sc)/da'
+                                       //   da' = 1 - (1 - alpha*sa) * (1 - da)
+                                       unsigned int da = NR_COMPOSEA_213(a, d[3]);
+                                       d[0] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[0], alpha), a, d[0], d[3], da);
+                                       d[1] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[1], alpha), a, d[1], d[3], da);
+                                       d[2] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[2], alpha), a, d[2], d[3], da);
+                                       d[3] = NR_NORMALIZE_31(da);
                                }
+                               d += 4;
+                               s += 4;
                        }
-                       d += 4;
-                       s += 4;
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
 void
 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (c = 0; c < w; c++) {
-                       unsigned int a;
-                       a = NR_PREMUL (s[3], alpha);
-                       if (a == 0) {
-                               /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
-                               /* Transparent BG, premul src */
-                               d[0] = NR_PREMUL (s[0], a);
-                               d[1] = NR_PREMUL (s[1], a);
-                               d[2] = NR_PREMUL (s[2], a);
-                               d[3] = a;
-                       } else {
-                               d[0] = NR_COMPOSENPP (s[0], a, d[0], d[3]);
-                               d[1] = NR_COMPOSENPP (s[1], a, d[1], d[3]);
-                               d[2] = NR_COMPOSENPP (s[2], a, d[2], d[3]);
-                               d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
+       unsigned int r, c;
+
+       if (alpha == 0) {
+               /* NOP */
+       } else if (alpha == 255) {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (s[3] == 255) {
+                                       /* Opaque FG, COPY */
+                                       //   dc' = (1 - sa) * dc + sa*sc = sa*sc = sc
+                                       //   da' = 1 - (1 - sa) * (1 - da) = 1 - 0 * (1 - da) = 1 (= sa)
+                                       memcpy(d, s, 4);
+                               } else if (d[3] == 0) {
+                                       /* Transparent BG, premul src */
+                                       //   dc' = (1 - sa) * dc + sa*sc = sa*sc
+                                       //   da' = 1 - (1 - sa) * (1 - da) = 1 - (1 - sa) = sa
+                                       d[0] = NR_PREMUL_111(s[0], s[3]);
+                                       d[1] = NR_PREMUL_111(s[1], s[3]);
+                                       d[2] = NR_PREMUL_111(s[2], s[3]);
+                                       d[3] = s[3];
+                               } else {
+                                       //   dc' = (1 - sa) * dc + sa*sc
+                                       //   da' = 1 - (1 - sa) * (1 - da)
+                                       d[0] = NR_COMPOSENPP_1111(s[0], s[3], d[0]);
+                                       d[1] = NR_COMPOSENPP_1111(s[1], s[3], d[1]);
+                                       d[2] = NR_COMPOSENPP_1111(s[2], s[3], d[2]);
+                                       d[3] = NR_COMPOSEA_111(s[3], d[3]);
+                               }
+                               d += 4;
+                               s += 4;
                        }
-                       d += 4;
-                       s += 4;
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               unsigned int a;
+                               a = NR_PREMUL_112 (s[3], alpha);
+                               if (a == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (d[3] == 0) {
+                                       /* Transparent BG, premul src */
+                                       //   dc' = (1 - alpha*sa) * dc + alpha*sa*sc = alpha*sa*sc
+                                       //   da' = 1 - (1 - alpha*sa) * (1 - da) = 1 - (1 - alpha*sa) = alpha*sa
+                                       d[0] = NR_PREMUL_121(s[0], a);
+                                       d[1] = NR_PREMUL_121(s[1], a);
+                                       d[2] = NR_PREMUL_121(s[2], a);
+                                       d[3] = NR_NORMALIZE_21(a);
+                               } else {
+                                       //   dc' = (1 - alpha*sa) * dc + alpha*sa*sc
+                                       //   da' = 1 - (1 - alpha*sa) * (1 - da)
+                                       d[0] = NR_COMPOSENPP_1211(s[0], a, d[0]);
+                                       d[1] = NR_COMPOSENPP_1211(s[1], a, d[1]);
+                                       d[2] = NR_COMPOSENPP_1211(s[2], a, d[2]);
+                                       d[3] = NR_COMPOSEA_211(a, d[3]);
+                               }
+                               d += 4;
+                               s += 4;
+                       }
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
 void
 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (c = 0; c < w; c++) {
-                       unsigned int a;
-                       a = NR_PREMUL (s[3], alpha);
-                       if (a == 0) {
-                               /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
-                               /* Transparent BG, COPY */
-                               d[0] = NR_PREMUL (s[0], alpha);
-                               d[1] = NR_PREMUL (s[1], alpha);
-                               d[2] = NR_PREMUL (s[2], alpha);
-                               d[3] = NR_PREMUL (s[3], alpha);
-                       } else {
-                               if (alpha == 255) {
-                                       /* Simple */
-                                       d[0] = NR_COMPOSEPPP (s[0], s[3], d[0], d[3]);
-                                       d[1] = NR_COMPOSEPPP (s[1], s[3], d[1], d[3]);
-                                       d[2] = NR_COMPOSEPPP (s[2], s[3], d[2], d[3]);
-                                       d[3] = (65025 - (255 - s[3]) * (255 - d[3]) + 127) / 255;
+       unsigned int r, c;
+
+       if (alpha == 0) {
+               /* Transparent FG, NOP */
+       } else if (alpha == 255) {
+               /* Simple */
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if ((s[3] == 255) || (d[3] == 0)) {
+                                       /* Transparent BG, COPY */
+                                       memcpy(d, s, 4);
                                } else {
-                                       unsigned int c;
-                                       c = NR_PREMUL (s[0], alpha);
-                                       d[0] = NR_COMPOSEPPP (c, a, d[0], d[3]);
-                                       c = NR_PREMUL (s[1], alpha);
-                                       d[1] = NR_COMPOSEPPP (c, a, d[1], d[3]);
-                                       c = NR_PREMUL (s[2], alpha);
-                                       d[2] = NR_COMPOSEPPP (c, a, d[2], d[3]);
-                                       d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
+                                       d[0] = NR_COMPOSEPPP_1111(s[0], s[3], d[0]);
+                                       d[1] = NR_COMPOSEPPP_1111(s[1], s[3], d[1]);
+                                       d[2] = NR_COMPOSEPPP_1111(s[2], s[3], d[2]);
+                                       d[3] = NR_COMPOSEA_111(s[3], d[3]);
                                }
+                               d += 4;
+                               s += 4;
                        }
-                       d += 4;
-                       s += 4;
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               if (s[3] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (d[3] == 0) {
+                                       /* Transparent BG, COPY */
+                                       d[0] = NR_PREMUL_111(s[0], alpha);
+                                       d[1] = NR_PREMUL_111(s[1], alpha);
+                                       d[2] = NR_PREMUL_111(s[2], alpha);
+                                       d[3] = NR_PREMUL_111(s[3], alpha);
+                               } else {
+                                       //   dc' = (1 - alpha*sa) * dc + alpha*sc
+                                       //   da' = 1 - (1 - alpha*sa) * (1 - da)
+                                       unsigned int a;
+                                       a = NR_PREMUL_112(s[3], alpha);
+                                       d[0] = NR_COMPOSEPPP_2211(NR_PREMUL_112(alpha, s[0]), a, d[0]);
+                                       d[1] = NR_COMPOSEPPP_2211(NR_PREMUL_112(alpha, s[1]), a, d[1]);
+                                       d[2] = NR_COMPOSEPPP_2211(NR_PREMUL_112(alpha, s[2]), a, d[2]);
+                                       d[3] = NR_COMPOSEA_211(a, d[3]);
+                               }
+                               d += 4;
+                               s += 4;
+                       }
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
@@ -313,18 +442,17 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, co
 void
 nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int x, y;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (x = 0; x < w; x++) {
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
                        d[0] = s[0];
                        d[1] = s[1];
                        d[2] = s[2];
-                       d[3] = (s[3] * m[0] + 127) / 255;
+                       d[3] = NR_PREMUL_111(s[3], m[0]);
                        d += 4;
                        s += 4;
                        m += 1;
@@ -338,23 +466,26 @@ nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, cons
 void
 nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int x, y;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (x = 0; x < w; x++) {
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
+                       a = NR_PREMUL_112 (s[3], m[0]);
                        if (a == 0) {
                                d[3] = 0;
+                       } else if (a == 255*255) {
+                               memcpy(d, s, 4);
                        } else {
-                               d[0] = (s[0] * 255 + (a >> 1)) / a;
-                               d[1] = (s[1] * 255 + (a >> 1)) / a;
-                               d[2] = (s[2] * 255 + (a >> 1)) / a;
-                               d[3] = a;
+                               //   dc' = ((1 - m*sa) * da*dc + m*sc)/da' = m*sc/da' = m*sc/(m*sa) = sc/sa
+                               //   da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - m*sa) = m*sa
+                               d[0] = NR_DEMUL_111(s[0], s[3]);
+                               d[1] = NR_DEMUL_111(s[1], s[3]);
+                               d[2] = NR_DEMUL_111(s[2], s[3]);
+                               d[3] = NR_NORMALIZE_21(a);
                        }
                        d += 4;
                        s += 4;
@@ -369,20 +500,25 @@ nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, cons
 void
 nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (c = 0; c < w; c++) {
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
-                       d[0] = NR_PREMUL (s[0], a);
-                       d[1] = NR_PREMUL (s[1], a);
-                       d[2] = NR_PREMUL (s[2], a);
-                       d[3] = a;
+                       a = NR_PREMUL_112(s[3], m[0]);
+                       if (a == 0) {
+                               memset(d, 0, 4);
+                       } else if (a == 255*255) {
+                               memcpy(d, s, 4);
+                       } else {
+                               d[0] = NR_PREMUL_121(s[0], a);
+                               d[1] = NR_PREMUL_121(s[1], a);
+                               d[2] = NR_PREMUL_121(s[2], a);
+                               d[3] = NR_NORMALIZE_21(a);
+                       }
                        d += 4;
                        s += 4;
                        m += 1;
@@ -396,25 +532,17 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, cons
 void
 nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (c = 0; c < w; c++) {
-                       if (m[0] == 255) {
-                               d[0] = s[0];
-                               d[1] = s[1];
-                               d[2] = s[2];
-                               d[3] = s[3];
-                       } else {
-                               d[0] = NR_PREMUL (s[0], m[0]);
-                               d[1] = NR_PREMUL (s[1], m[0]);
-                               d[2] = NR_PREMUL (s[2], m[0]);
-                               d[3] = NR_PREMUL (s[3], m[0]);
-                       }
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
+                       d[0] = NR_PREMUL_111(s[0], m[0]);
+                       d[1] = NR_PREMUL_111(s[1], m[0]);
+                       d[2] = NR_PREMUL_111(s[2], m[0]);
+                       d[3] = NR_PREMUL_111(s[3], m[0]);
                        d += 4;
                        s += 4;
                        m += 1;
@@ -428,32 +556,31 @@ nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, cons
 void
 nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (c = 0; c < w; c++) {
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
+                       a = NR_PREMUL_112(s[3], m[0]);
                        if (a == 0) {
                                /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
+                       } else if ((a == 255*255) || (d[3] == 0)) {
                                /* Full coverage, COPY */
                                d[0] = s[0];
                                d[1] = s[1];
                                d[2] = s[2];
-                               d[3] = a;
+                               d[3] = NR_NORMALIZE_21(a);
                        } else {
-                               unsigned int ca;
                                /* Full composition */
-                               ca = 65025 - (255 - a) * (255 - d[3]);
-                               d[0] = NR_COMPOSENNN_A7 (s[0], a, d[0], d[3], ca);
-                               d[1] = NR_COMPOSENNN_A7 (s[1], a, d[1], d[3], ca);
-                               d[2] = NR_COMPOSENNN_A7 (s[2], a, d[2], d[3], ca);
-                               d[3] = (ca + 127) / 255;
+                               unsigned int ca;
+                               ca = NR_COMPOSEA_213(a, d[3]);
+                               d[0] = NR_COMPOSENNN_121131(s[0], a, d[0], d[3], ca);
+                               d[1] = NR_COMPOSENNN_121131(s[1], a, d[1], d[3], ca);
+                               d[2] = NR_COMPOSENNN_121131(s[2], a, d[2], d[3], ca);
+                               d[3] = NR_NORMALIZE_31(ca);
                        }
                        d += 4;
                        s += 4;
@@ -468,45 +595,45 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs,
 void
 nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (c = 0; c < w; c++) {
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
+                       a = NR_PREMUL_112(s[3], m[0]);
                        if (a == 0) {
                                /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
+                       } else if (a == 255*255) {
+                               /* Opaque FG, COPY */
+                               memcpy(d, s, 4);
+                       } else if (d[3] == 0) {
                                /* Full coverage, demul src */
-                               d[0] = (s[0] * 255 + (s[3] >> 1)) / s[3];
-                               d[1] = (s[1] * 255 + (s[3] >> 1)) / s[3];
-                               d[2] = (s[2] * 255 + (s[3] >> 1)) / s[3];
-                               d[3] = a;
+                               //   dc' = ((1 - m*sa) * da*dc + m*sc)/da' = m*sc/da' = m*sc/(m*sa) = sc/sa
+                               //   da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - m*sa) = m*sa
+                               d[0] = NR_DEMUL_111(s[0], s[3]);
+                               d[1] = NR_DEMUL_111(s[1], s[3]);
+                               d[2] = NR_DEMUL_111(s[2], s[3]);
+                               d[3] = NR_NORMALIZE_21(a);
+                       } else if (m[0] == 255) {
+                               /* Full composition */
+                               //   dc' = ((1 - m*sa) * da*dc + m*sc)/da' = ((1 - sa) * da*dc + sc)/da'
+                               //   da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - sa) * (1 - da)
+                               unsigned int da = NR_COMPOSEA_112(s[3], d[3]);
+                               d[0] = NR_COMPOSEPNN_111121(s[0], s[3], d[0], d[3], da);
+                               d[1] = NR_COMPOSEPNN_111121(s[1], s[3], d[1], d[3], da);
+                               d[2] = NR_COMPOSEPNN_111121(s[2], s[3], d[2], d[3], da);
+                               d[3] = NR_NORMALIZE_21(da);
                        } else {
-                               if (m[0] == 255) {
-                                       unsigned int ca;
-                                       /* Full composition */
-                                       ca = 65025 - (255 - s[3]) * (255 - d[3]);
-                                       d[0] = NR_COMPOSEPNN_A7 (s[0], s[3], d[0], d[3], ca);
-                                       d[1] = NR_COMPOSEPNN_A7 (s[1], s[3], d[1], d[3], ca);
-                                       d[2] = NR_COMPOSEPNN_A7 (s[2], s[3], d[2], d[3], ca);
-                                       d[3] = (65025 - (255 - s[3]) * (255 - d[3]) + 127) / 255;
-                               } else {
-                                       // calculate premultiplied from two premultiplieds:
-                                       d[0] = NR_COMPOSEPPP(NR_PREMUL (s[0], m[0]), a, NR_PREMUL (d[0], d[3]), 0); // last parameter not used
-                                       d[1] = NR_COMPOSEPPP(NR_PREMUL (s[1], m[0]), a, NR_PREMUL (d[1], d[3]), 0);
-                                       d[2] = NR_COMPOSEPPP(NR_PREMUL (s[2], m[0]), a, NR_PREMUL (d[2], d[3]), 0);
-                                       // total opacity:
-                                       d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
-                                       // un-premultiply channels:
-                                       d[0] = d[0]*255/d[3];
-                                       d[1] = d[1]*255/d[3];
-                                       d[2] = d[2]*255/d[3];
-                               }
+                               //   dc' = ((1 - m*sa) * da*dc + m*sc)/da'
+                               //   da' = 1 - (1 - m*sa) * (1 - da)
+                               unsigned int da = NR_COMPOSEA_213(a, d[3]);
+                               d[0] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[0], m[0]), a, d[0], d[3], da);
+                               d[1] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[1], m[0]), a, d[1], d[3], da);
+                               d[2] = NR_COMPOSEPNN_221131(NR_PREMUL_112(s[2], m[0]), a, d[2], d[3], da);
+                               d[3] = NR_NORMALIZE_31(da);
                        }
                        d += 4;
                        s += 4;
@@ -521,29 +648,24 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs,
 void
 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (c = 0; c < w; c++) {
+       unsigned int r, c;
+
+       for (r = h; r>0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c>0; c--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
+                       a = NR_PREMUL_112(s[3], m[0]);
                        if (a == 0) {
                                /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
-                               /* Transparent BG, premul src */
-                               d[0] = NR_PREMUL (s[0], a);
-                               d[1] = NR_PREMUL (s[1], a);
-                               d[2] = NR_PREMUL (s[2], a);
-                               d[3] = a;
+                       } else if (a == 255*255) {
+                               memcpy(d, s, 4);
                        } else {
-                               d[0] = NR_COMPOSENPP (s[0], a, d[0], d[3]);
-                               d[1] = NR_COMPOSENPP (s[1], a, d[1], d[3]);
-                               d[2] = NR_COMPOSENPP (s[2], a, d[2], d[3]);
-                               d[3] = (65025 - (255 - a) * (255 - d[3]) + 127) / 255;
+                               d[0] = NR_COMPOSENPP_1211(s[0], a, d[0]);
+                               d[1] = NR_COMPOSENPP_1211(s[1], a, d[1]);
+                               d[2] = NR_COMPOSENPP_1211(s[2], a, d[2]);
+                               d[3] = NR_COMPOSEA_211(a, d[3]);
                        }
                        d += 4;
                        s += 4;
@@ -558,41 +680,35 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs,
 void
 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (c = 0; c < w; c++) {
+       unsigned int r, c;
+
+       for (r = h; r > 0; r--) {
+               unsigned char *d = px;
+               const unsigned char *s = spx;
+               const unsigned char *m = mpx;
+               for (c = w; c > 0; c--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
+                       a = NR_PREMUL_112 (s[3], m[0]);
                        if (a == 0) {
                                /* Transparent FG, NOP */
-                       } else if ((a == 255) || (d[3] == 0)) {
+                       } else if (a == 255*255) {
+                               /* Opaque FG, COPY */
+                               memcpy(d, s, 4);
+                       } else if (d[3] == 0) {
                                /* Transparent BG, COPY */
-                               d[0] = NR_PREMUL (s[0], m[0]);
-                               d[1] = NR_PREMUL (s[1], m[0]);
-                               d[2] = NR_PREMUL (s[2], m[0]);
-                               d[3] = NR_PREMUL (s[3], m[0]);
+                               //   dc' = (1 - m*sa) * dc + m*sc = m*sc
+                               //   da' = 1 - (1 - m*sa) * (1 - da) = 1 - (1 - m*sa)  = m*sa
+                               d[0] = NR_PREMUL_111 (s[0], m[0]);
+                               d[1] = NR_PREMUL_111 (s[1], m[0]);
+                               d[2] = NR_PREMUL_111 (s[2], m[0]);
+                               d[3] = NR_NORMALIZE_21(a);
                        } else {
-                               if (m[0] == 255) {
-                                       /* Simple */
-                                       d[0] = NR_COMPOSEPPP (s[0], s[3], d[0], d[3]);
-                                       d[1] = NR_COMPOSEPPP (s[1], s[3], d[1], d[3]);
-                                       d[2] = NR_COMPOSEPPP (s[2], s[3], d[2], d[3]);
-                                       d[3] = NR_A7_NORMALIZED(s[3], d[3]);
-                               } else {
-                                       unsigned int c;
-                                       c = NR_PREMUL (s[0], m[0]);
-                                       d[0] = NR_COMPOSEPPP (c, a, d[0], d[3]);
-                                       c = NR_PREMUL (s[1], m[0]);
-                                       d[1] = NR_COMPOSEPPP (c, a, d[1], d[3]);
-                                       c = NR_PREMUL (s[2], m[0]);
-                                       d[2] = NR_COMPOSEPPP (c, a, d[2], d[3]);
-                                       d[3] = NR_A7_NORMALIZED(a, d[3]);
-                               }
+                               //   dc' = (1 - m*sa) * dc + m*sc
+                               //   da' = 1 - (1 - m*sa) * (1 - da)
+                               d[0] = NR_COMPOSEPPP_2211 (NR_PREMUL_112 (s[0], m[0]), a, d[0]);
+                               d[1] = NR_COMPOSEPPP_2211 (NR_PREMUL_112 (s[1], m[0]), a, d[1]);
+                               d[2] = NR_COMPOSEPPP_2211 (NR_PREMUL_112 (s[2], m[0]), a, d[2]);
+                               d[3] = NR_COMPOSEA_211(a, d[3]);
                        }
                        d += 4;
                        s += 4;
@@ -604,78 +720,97 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs,
        }
 }
 
+/* FINAL DST MASK COLOR */
+
 void
-nr_R8G8B8A8_N_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba)
+nr_R8G8B8A8_N_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba)
 {
        unsigned int r, g, b, a;
-       int x, y;
+       unsigned int x, y;
 
        r = NR_RGBA32_R (rgba);
        g = NR_RGBA32_G (rgba);
        b = NR_RGBA32_B (rgba);
        a = NR_RGBA32_A (rgba);
 
-       if (a == 0) return;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (x = 0; x < w; x++) {
-                       d[0] = r;
-                       d[1] = g;
-                       d[2] = b;
-                       d[3] = NR_PREMUL (s[0], a);
-                       d += 4;
-                       s += 1;
+       for (y = h; y > 0; y--) {
+               if (a == 0) {
+                       memset(px, 0, w*4);
+               } else {
+                       unsigned char *d = px;
+                       const unsigned char *m = mpx;
+                       for (x = w; x > 0; x--) {
+                               d[0] = r;
+                               d[1] = g;
+                               d[2] = b;
+                               d[3] = NR_PREMUL_111 (m[0], a);
+                               d += 4;
+                               m += 1;
+                       }
                }
                px += rs;
-               spx += srs;
+               mpx += mrs;
        }
 }
 
 void
-nr_R8G8B8A8_P_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba)
+nr_R8G8B8A8_P_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba)
 {
        unsigned int r, g, b, a;
-       int x, y;
+       unsigned int x, y;
 
        r = NR_RGBA32_R (rgba);
        g = NR_RGBA32_G (rgba);
        b = NR_RGBA32_B (rgba);
        a = NR_RGBA32_A (rgba);
 
-       if (a == 0) return;
-
 #ifdef WITH_MMX
        if (NR_PIXOPS_MMX) {
                unsigned char c[4];
-               c[0] = NR_PREMUL (r, a);
-               c[1] = NR_PREMUL (g, a);
-               c[2] = NR_PREMUL (b, a);
+               c[0] = NR_PREMUL_111 (r, a);
+               c[1] = NR_PREMUL_111 (g, a);
+               c[2] = NR_PREMUL_111 (b, a);
                c[3] = a;
                /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
-               nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP (px, w, h, rs, spx, srs, c);
+               nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP (px, w, h, rs, mpx, mrs, c);
                return;
        }
 #endif
 
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (x = 0; x < w; x++) {
-                       unsigned int ca;
-                       ca = s[0] * a;
-                       d[0] = (r * ca + 32512) / 65025;
-                       d[1] = (g * ca + 32512) / 65025;
-                       d[2] = (b * ca + 32512) / 65025;
-                       d[3] = (ca + 127) / 255;
-                       d += 4;
-                       s += 1;
+       if ( a != 255 ){
+               // Pre-premultiply color values
+               r *= a;
+               g *= a;
+               b *= a;
+       }
+
+       for (y = h; y > 0; y--) {
+               unsigned char *d = px;
+               const unsigned char *m = mpx;
+               if (a == 0) {
+                       memset(px, 0, w*4);
+               } else if (a == 255) {
+                       for (x = w; x > 0; x--) {
+                               d[0] = NR_PREMUL_111(m[0], r);
+                               d[1] = NR_PREMUL_111(m[0], g);
+                               d[2] = NR_PREMUL_111(m[0], b);
+                               d[3] = m[0];
+                               d += 4;
+                               m += 1;
+                       }
+               } else {
+                       for (x = w; x > 0; x--) {
+                               // Color values are already premultiplied with a
+                               d[0] = NR_PREMUL_121(m[0], r);
+                               d[1] = NR_PREMUL_121(m[0], g);
+                               d[2] = NR_PREMUL_121(m[0], b);
+                               d[3] = NR_PREMUL_111(m[0], a);
+                               d += 4;
+                               m += 1;
+                       }
                }
                px += rs;
-               spx += srs;
+               mpx += mrs;
        }
 }
 
@@ -683,75 +818,118 @@ void
 nr_R8G8B8_R8G8B8_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba)
 {
        unsigned int r, g, b, a;
-       int x, y;
+       unsigned int x, y;
 
        r = NR_RGBA32_R (rgba);
        g = NR_RGBA32_G (rgba);
        b = NR_RGBA32_B (rgba);
        a = NR_RGBA32_A (rgba);
 
-       if (a == 0) return;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *m;
-               d = (unsigned char *) px;
-               m = (unsigned char *) mpx;
-               for (x = 0; x < w; x++) {
-                       unsigned int alpha;
-                       alpha = NR_PREMUL (a, m[0]);
-                       d[0] = NR_COMPOSEN11 (r, alpha, d[0]);
-                       d[1] = NR_COMPOSEN11 (g, alpha, d[1]);
-                       d[2] = NR_COMPOSEN11 (b, alpha, d[2]);
-                       d += 3;
-                       m += 1;
+       if (a == 0) {
+               /* NOP */
+       } else if (a == 255) {
+               for (y = h; y > 0; y--) {
+                       unsigned char *d = px;
+                       const unsigned char *m = mpx;
+                       for (x = w; x > 0; x--) {
+                               d[0] = NR_COMPOSEN11_1111 (r, m[0], d[0]);
+                               d[1] = NR_COMPOSEN11_1111 (g, m[0], d[1]);
+                               d[2] = NR_COMPOSEN11_1111 (b, m[0], d[2]);
+                               d += 3;
+                               m += 1;
+                       }
+                       px += rs;
+                       mpx += mrs;
+               }
+       } else {
+               for (y = h; y > 0; y--) {
+                       unsigned char *d = px;
+                       const unsigned char *m = mpx;
+                       for (x = w; x > 0; x--) {
+                               //   dc' = (1 - m*sa) * dc + m*sa*sc
+                               unsigned int alpha;
+                               alpha = NR_PREMUL_112 (a, m[0]);
+                               d[0] = NR_COMPOSEN11_1211 (r, alpha, d[0]);
+                               d[1] = NR_COMPOSEN11_1211 (g, alpha, d[1]);
+                               d[2] = NR_COMPOSEN11_1211 (b, alpha, d[2]);
+                               d += 3;
+                               m += 1;
+                       }
+                       px += rs;
+                       mpx += mrs;
                }
-               px += rs;
-               mpx += mrs;
        }
 }
 
 void
-nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba)
+nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba)
 {
        unsigned int r, g, b, a;
-       int x, y;
+       unsigned int x, y;
 
        r = NR_RGBA32_R (rgba);
        g = NR_RGBA32_G (rgba);
        b = NR_RGBA32_B (rgba);
        a = NR_RGBA32_A (rgba);
 
-       if (a == 0) return;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (x = 0; x < w; x++) {
-                       unsigned int ca;
-                       ca = NR_PREMUL (s[0], a);
-                       if (ca == 0) {
-                               /* Transparent FG, NOP */
-                       } else if ((ca == 255) || (d[3] == 0)) {
-                               /* Full coverage, COPY */
-                               d[0] = r;
-                               d[1] = g;
-                               d[2] = b;
-                               d[3] = ca;
-                       } else {
-                               unsigned int da;
-                               /* Full composition */
-                               da = 65025 - (255 - ca) * (255 - d[3]);
-                               d[0] = NR_COMPOSENNN_A7 (r, ca, d[0], d[3], da);
-                               d[1] = NR_COMPOSENNN_A7 (g, ca, d[1], d[3], da);
-                               d[2] = NR_COMPOSENNN_A7 (b, ca, d[2], d[3], da);
-                               d[3] = (da + 127) / 255;
+       if (a == 0) {
+               /* NOP */
+       } else if (a == 255) {
+               for (y = h; y > 0; y--) {
+                       unsigned char *d = px;
+                       const unsigned char *m = mpx;
+                       for (x = w; x > 0; x--) {
+                               if (m[0] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (m[0] == 255 || d[3] == 0) {
+                                       /* Full coverage, COPY */
+                                       d[0] = r;
+                                       d[1] = g;
+                                       d[2] = b;
+                                       d[3] = m[0];
+                               } else {
+                                       /* Full composition */
+                                       unsigned int da = NR_COMPOSEA_112(m[0], d[3]);
+                                       d[0] = NR_COMPOSENNN_111121(r, m[0], d[0], d[3], da);
+                                       d[1] = NR_COMPOSENNN_111121(g, m[0], d[1], d[3], da);
+                                       d[2] = NR_COMPOSENNN_111121(b, m[0], d[2], d[3], da);
+                                       d[3] = NR_NORMALIZE_21(da);
+                               }
+                               d += 4;
+                               m += 1;
                        }
-                       d += 4;
-                       s += 1;
+                       px += rs;
+                       mpx += mrs;
+               }
+       } else {
+               for (y = h; y > 0; y--) {
+                       unsigned char *d = px;
+                       const unsigned char *m = mpx;
+                       for (x = w; x > 0; x--) {
+                               unsigned int ca;
+                               ca = NR_PREMUL_112 (m[0], a);
+                               if (ca == 0) {
+                                       /* Transparent FG, NOP */
+                               } else if (d[3] == 0) {
+                                       /* Full coverage, COPY */
+                                       d[0] = r;
+                                       d[1] = g;
+                                       d[2] = b;
+                                       d[3] = NR_NORMALIZE_21(ca);
+                               } else {
+                                       /* Full composition */
+                                       unsigned int da = NR_COMPOSEA_213(ca, d[3]);
+                                       d[0] = NR_COMPOSENNN_121131(r, ca, d[0], d[3], da);
+                                       d[1] = NR_COMPOSENNN_121131(g, ca, d[1], d[3], da);
+                                       d[2] = NR_COMPOSENNN_121131(b, ca, d[2], d[3], da);
+                                       d[3] = NR_NORMALIZE_31(da);
+                               }
+                               d += 4;
+                               m += 1;
+                       }
+                       px += rs;
+                       mpx += mrs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
@@ -759,9 +937,7 @@ void
 nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba)
 {
        unsigned int r, g, b, a;
-       int x, y;
-
-       if (!(rgba & 0xff)) return;
+       unsigned int x, y;
 
        r = NR_RGBA32_R (rgba);
        g = NR_RGBA32_G (rgba);
@@ -769,11 +945,11 @@ nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, con
        a = NR_RGBA32_A (rgba);
 
 #ifdef WITH_MMX
-       if (NR_PIXOPS_MMX) {
+       if (NR_PIXOPS_MMX && a != 0) {
                unsigned char c[4];
-               c[0] = NR_PREMUL (r, a);
-               c[1] = NR_PREMUL (g, a);
-               c[2] = NR_PREMUL (b, a);
+               c[0] = NR_PREMUL_111 (r, a);
+               c[1] = NR_PREMUL_111 (g, a);
+               c[2] = NR_PREMUL_111 (b, a);
                c[3] = a;
                /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
                nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP (px, w, h, rs, spx, srs, c);
@@ -781,33 +957,55 @@ nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, con
        }
 #endif
 
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               for (x = 0; x < w; x++) {
-                       unsigned int ca;
-                       ca = NR_PREMUL (s[0], a);
-                       if (ca == 0) {
-                               /* Transparent FG, NOP */
-                       } else if ((ca == 255) || (d[3] == 0)) {
-                               /* Full coverage, COPY */
-                               d[0] = NR_PREMUL (r, ca);
-                               d[1] = NR_PREMUL (g, ca);
-                               d[2] = NR_PREMUL (b, ca);
-                               d[3] = ca;
-                       } else {
-                               /* Full composition */
-                               d[0] = NR_COMPOSENPP (r, ca, d[0], d[3]);
-                               d[1] = NR_COMPOSENPP (g, ca, d[1], d[3]);
-                               d[2] = NR_COMPOSENPP (b, ca, d[2], d[3]);
-                               d[3] = (65025 - (255 - ca) * (255 - d[3]) + 127) / 255;
+       if (a == 0) {
+               /* Transparent FG, NOP */
+       } else if (a == 255) {
+               /* Simple */
+               for (y = h; y > 0; y--) {
+                       unsigned char *d, *s;
+                       d = (unsigned char *) px;
+                       s = (unsigned char *) spx;
+                       for (x = w; x > 0; x--) {
+                               if (s[0] == 0) {
+                                       /* Transparent FG, NOP */
+                               } else {
+                                       /* Full composition */
+                                       unsigned int invca = 255-s[0]; // By swapping the arguments GCC can better optimize these calls
+                                       d[0] = NR_COMPOSENPP_1111(d[0], invca, r);
+                                       d[1] = NR_COMPOSENPP_1111(d[1], invca, g);
+                                       d[2] = NR_COMPOSENPP_1111(d[2], invca, b);
+                                       d[3] = NR_COMPOSEA_111(s[0], d[3]);
+                               }
+                               d += 4;
+                               s += 1;
                        }
-                       d += 4;
-                       s += 1;
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (y = h; y > 0; y--) {
+                       unsigned char *d, *s;
+                       d = (unsigned char *) px;
+                       s = (unsigned char *) spx;
+                       for (x = w; x > 0; x--) {
+                               unsigned int ca;
+                               ca = NR_PREMUL_112 (s[0], a);
+                               if (ca == 0) {
+                                       /* Transparent FG, NOP */
+                               } else {
+                                       /* Full composition */
+                                       unsigned int invca = 255*255-ca; // By swapping the arguments GCC can better optimize these calls
+                                       d[0] = NR_COMPOSENPP_1211(d[0], invca, r);
+                                       d[1] = NR_COMPOSENPP_1211(d[1], invca, g);
+                                       d[2] = NR_COMPOSENPP_1211(d[2], invca, b);
+                                       d[3] = NR_COMPOSEA_211(ca, d[3]);
+                               }
+                               d += 4;
+                               s += 1;
+                       }
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
@@ -816,25 +1014,24 @@ nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, con
 void
 nr_R8G8B8_R8G8B8_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       if (alpha == 0) return;
+       unsigned int r, c;
 
 #ifdef WITH_MMX
-       if (NR_PIXOPS_MMX) {
+       if (NR_PIXOPS_MMX && alpha != 0) {
                /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
                nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P (px, w, h, rs, spx, srs, alpha);
                return;
        }
 #endif
 
-       for (r = 0; r < h; r++) {
-               const unsigned char *s;
-               unsigned char *d;
-               if (alpha == 255) {
-                       d = px;
-                       s = spx;
-                       for (c = 0; c < w; c++) {
+       if (alpha == 0) {
+               /* NOP */
+       } else if (alpha == 255) {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               //   dc' = (1 - alpha*sa) * dc + alpha*sc = (1 - sa) * dc + sc
                                if (s[3] == 0) {
                                        /* NOP */
                                } else if (s[3] == 255) {
@@ -842,50 +1039,54 @@ nr_R8G8B8_R8G8B8_R8G8B8A8_P (unsigned char *px, int w, int h, int rs, const unsi
                                        d[1] = s[1];
                                        d[2] = s[2];
                                } else {
-                                       d[0] = NR_COMPOSEP11 (s[0], s[3], d[0]);
-                                       d[1] = NR_COMPOSEP11 (s[1], s[3], d[1]);
-                                       d[2] = NR_COMPOSEP11 (s[2], s[3], d[2]);
+                                       d[0] = NR_COMPOSEP11_1111(s[0], s[3], d[0]);
+                                       d[1] = NR_COMPOSEP11_1111(s[1], s[3], d[1]);
+                                       d[2] = NR_COMPOSEP11_1111(s[2], s[3], d[2]);
                                }
                                d += 3;
                                s += 4;
                        }
-               } else {
-                       d = px;
-                       s = spx;
-                       for (c = 0; c < w; c++) {
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
                                unsigned int a;
-                               a = NR_PREMUL (s[3], alpha);
+                               a = NR_PREMUL_112(s[3], alpha);
+                               //   dc' = (1 - alpha*sa) * dc + alpha*sc
                                if (a == 0) {
                                        /* NOP */
                                } else {
-                                       d[0] = NR_COMPOSEP11 (s[0], a, d[0]);
-                                       d[1] = NR_COMPOSEP11 (s[1], a, d[1]);
-                                       d[2] = NR_COMPOSEP11 (s[2], a, d[2]);
+                                       d[0] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[0], alpha), a, d[0]);
+                                       d[1] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[1], alpha), a, d[1]);
+                                       d[2] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[2], alpha), a, d[2]);
                                }
                                /* a == 255 is impossible, because alpha < 255 */
                                d += 3;
                                s += 4;
                        }
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
 void
 nr_R8G8B8_R8G8B8_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned int alpha)
 {
-       int r, c;
-
-       for (r = 0; r < h; r++) {
-               const unsigned char *s;
-               unsigned char *d;
-               if (alpha == 0) {
-                       /* NOP */
-               } else if (alpha == 255) {
-                       d = px;
-                       s = spx;
-                       for (c = 0; c < w; c++) {
+       unsigned int r, c;
+
+       if (alpha == 0) {
+               /* NOP */
+       } else if (alpha == 255) {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
+                               //   dc' = (1 - alpha*sa) * dc + alpha*sa*sc = (1 - sa) * dc + sa*sc
                                if (s[3] == 0) {
                                        /* NOP */
                                } else if (s[3] == 255) {
@@ -893,57 +1094,62 @@ nr_R8G8B8_R8G8B8_R8G8B8A8_N (unsigned char *px, int w, int h, int rs, const unsi
                                        d[1] = s[1];
                                        d[2] = s[2];
                                } else {
-                                       d[0] = NR_COMPOSEN11 (s[0], s[3], d[0]);
-                                       d[1] = NR_COMPOSEN11 (s[1], s[3], d[1]);
-                                       d[2] = NR_COMPOSEN11 (s[2], s[3], d[2]);
+                                       d[0] = NR_COMPOSEN11_1111(s[0], s[3], d[0]);
+                                       d[1] = NR_COMPOSEN11_1111(s[1], s[3], d[1]);
+                                       d[2] = NR_COMPOSEN11_1111(s[2], s[3], d[2]);
                                }
                                d += 3;
                                s += 4;
                        }
-               } else {
-                       d = px;
-                       s = spx;
-                       for (c = 0; c < w; c++) {
+                       px += rs;
+                       spx += srs;
+               }
+       } else {
+               for (r = h; r > 0; r--) {
+                       unsigned char *d = px;
+                       const unsigned char *s = spx;
+                       for (c = w; c > 0; c--) {
                                unsigned int a;
-                               a = NR_PREMUL (s[3], alpha);
+                               a = NR_PREMUL_112(s[3], alpha);
+                               //   dc' = (1 - alpha*sa) * dc + alpha*sa*sc
                                if (a == 0) {
                                        /* NOP */
                                } else {
-                                       d[0] = NR_COMPOSEN11 (s[0], a, d[0]);
-                                       d[1] = NR_COMPOSEN11 (s[1], a, d[1]);
-                                       d[2] = NR_COMPOSEN11 (s[2], a, d[2]);
+                                       d[0] = NR_COMPOSEN11_1211(s[0], a, d[0]);
+                                       d[1] = NR_COMPOSEN11_1211(s[1], a, d[1]);
+                                       d[2] = NR_COMPOSEN11_1211(s[2], a, d[2]);
                                }
                                /* a == 255 is impossible, because alpha < 255 */
                                d += 3;
                                s += 4;
                        }
+                       px += rs;
+                       spx += srs;
                }
-               px += rs;
-               spx += srs;
        }
 }
 
 void
 nr_R8G8B8_R8G8B8_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int x, y;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (x = 0; x < w; x++) {
+       unsigned int x, y;
+
+       for (y = h; y > 0; y--) {
+               unsigned char* d = px;
+               const unsigned char* s = spx;
+               const unsigned char* m = mpx;
+               for (x = w; x > 0; x--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
-                       if (a != 0) {
-                               unsigned int r, g, b;
-                               r = NR_PREMUL (s[0], m[0]);
-                               d[0] = NR_COMPOSEP11 (r, a, d[0]);
-                               g = NR_PREMUL (s[1], m[0]);
-                               d[1] = NR_COMPOSEP11 (g, a, d[1]);
-                               b = NR_PREMUL (s[2], m[0]);
-                               d[2] = NR_COMPOSEP11 (b, a, d[2]);
+                       a = NR_PREMUL_112(s[3], m[0]);
+                       if (a == 0) {
+                               /* NOP */
+                       } else if (a == 255*255) {
+                               memcpy(d, s, 3);
+                       } else {
+                               //   dc' = (1 - m*sa) * dc + m*sc
+                               d[0] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[0], m[0]), a, d[0]);
+                               d[1] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[1], m[0]), a, d[1]);
+                               d[2] = NR_COMPOSEP11_2211(NR_PREMUL_112(s[2], m[0]), a, d[2]);
                        }
                        d += 3;
                        s += 4;
@@ -958,20 +1164,24 @@ nr_R8G8B8_R8G8B8_R8G8B8A8_P_A8 (unsigned char *px, int w, int h, int rs, const u
 void
 nr_R8G8B8_R8G8B8_R8G8B8A8_N_A8 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, const unsigned char *mpx, int mrs)
 {
-       int x, y;
-
-       for (y = 0; y < h; y++) {
-               unsigned char *d, *s, *m;
-               d = (unsigned char *) px;
-               s = (unsigned char *) spx;
-               m = (unsigned char *) mpx;
-               for (x = 0; x < w; x++) {
+       unsigned int x, y;
+
+       for (y = h; y > 0; y--) {
+               unsigned char* d = px;
+               const unsigned char* s = spx;
+               const unsigned char* m = mpx;
+               for (x = w; x > 0; x--) {
                        unsigned int a;
-                       a = NR_PREMUL (s[3], m[0]);
-                       if (a != 0) {
-                               d[0] = NR_COMPOSEP11 (s[0], a, d[0]);
-                               d[1] = NR_COMPOSEP11 (s[1], a, d[1]);
-                               d[2] = NR_COMPOSEP11 (s[2], a, d[2]);
+                       a = NR_PREMUL_112(s[3], m[0]);
+                       if (a == 0) {
+                               /* NOP */
+                       } else if (a == 255*255) {
+                               memcpy(d, s, 3);
+                       } else {
+                               //   dc' = (1 - m*sa) * dc + m*sa*sc
+                               d[0] = NR_COMPOSEN11_1211(s[0], a, d[0]);
+                               d[1] = NR_COMPOSEN11_1211(s[1], a, d[1]);
+                               d[2] = NR_COMPOSEN11_1211(s[2], a, d[2]);
                        }
                        d += 3;
                        s += 4;
index ccdb52cb02279158e959a34d497c39a2e645947c..4cecfac602b0b97fc02581fffb743f2270d073aa 100644 (file)
@@ -55,9 +55,9 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_A8 (unsigned char *p, int w, int h, int
 void nr_R8G8B8A8_N_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba);
 void nr_R8G8B8A8_P_EMPTY_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba);
 
-void nr_R8G8B8_R8G8B8_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba);
-void nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba);
-void nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *spx, int srs, unsigned long rgba);
+void nr_R8G8B8_R8G8B8_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba);
+void nr_R8G8B8A8_N_R8G8B8A8_N_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba);
+void nr_R8G8B8A8_P_R8G8B8A8_P_A8_RGBA32 (unsigned char *px, int w, int h, int rs, const unsigned char *mpx, int mrs, unsigned long rgba);
 
 /* RGB */
 
index 4553eb598d84a4f2a8437903e8e50b14b241465a..7383d6c762ee19c8d64eebbe69b661b6afca57b5 100644 (file)
@@ -161,10 +161,10 @@ nr_rgradient_render_generic_symmetric(NRRGradientRenderer *rgr, NRPixBlock *pb)
                     idx = (int) CLAMP(pos, 0, (double) NRG_MASK);
                 }
                 unsigned char const *s = rgr->vector + 4 * idx;
-                d[0] = NR_COMPOSENPP(s[0], s[3], d[0], d[3]);
-                d[1] = NR_COMPOSENPP(s[1], s[3], d[1], d[3]);
-                d[2] = NR_COMPOSENPP(s[2], s[3], d[2], d[3]);
-                d[3] = (255*255 - (255 - s[3]) * (255 - d[3]) + 127) / 255;
+                d[0] = NR_COMPOSENPP_1111(s[0], s[3], d[0]);
+                d[1] = NR_COMPOSENPP_1111(s[1], s[3], d[1]);
+                d[2] = NR_COMPOSENPP_1111(s[2], s[3], d[2]);
+                d[3] = NR_COMPOSEA_111(s[3], d[3]);
                 d += 4;
                 gx += dx;
                 gy += dy;
@@ -193,11 +193,11 @@ nr_rgradient_render_generic_symmetric(NRRGradientRenderer *rgr, NRPixBlock *pb)
                     d[2] = s[2];
                     d[3] = 255;
                 } else if (s[3] != 0) {
-                    unsigned ca = 255*255 - (255 - s[3]) * (255 - d[3]);
-                    d[0] = NR_COMPOSENNN_A7(s[0], s[3], d[0], d[3], ca);
-                    d[1] = NR_COMPOSENNN_A7(s[1], s[3], d[1], d[3], ca);
-                    d[2] = NR_COMPOSENNN_A7(s[2], s[3], d[2], d[3], ca);
-                    d[3] = (ca + 127) / 255;
+                    unsigned ca = NR_COMPOSEA_112(s[3], d[3]);
+                    d[0] = NR_COMPOSENNN_111121(s[0], s[3], d[0], d[3], ca);
+                    d[1] = NR_COMPOSENNN_111121(s[1], s[3], d[1], d[3], ca);
+                    d[2] = NR_COMPOSENNN_111121(s[2], s[3], d[2], d[3], ca);
+                    d[3] = NR_NORMALIZE_21(ca);
                 }
                 d += 4;
                 gx += dx;
index b4e25638f383e2535f10e981deb4eb2d68ffd736..77178658420133d515a6b2941a879d607e069d33 100644 (file)
@@ -52,28 +52,28 @@ nr_pixblock_render_gray_noise (NRPixBlock *pb, NRPixBlock *mask)
                                v = v ^ noise[seed];
                                switch (pb->mode) {
                                case NR_PIXBLOCK_MODE_A8:
-                                       d[0] = (65025 - (255 - m[0]) * (255 - d[0]) + 127) / 255;
+                                       d[0] = NR_COMPOSEA_111(m[0], d[0]);
                                        break;
                                case NR_PIXBLOCK_MODE_R8G8B8:
-                                       d[0] = NR_COMPOSEN11 (v, m[0], d[0]);
-                                       d[1] = NR_COMPOSEN11 (v, m[0], d[1]);
-                                       d[2] = NR_COMPOSEN11 (v, m[0], d[2]);
+                                       d[0] = NR_COMPOSEN11_1111 (v, m[0], d[0]);
+                                       d[1] = NR_COMPOSEN11_1111 (v, m[0], d[1]);
+                                       d[2] = NR_COMPOSEN11_1111 (v, m[0], d[2]);
                                        break;
                                case NR_PIXBLOCK_MODE_R8G8B8A8N:
                                        if (m[0] != 0) {
                                                unsigned int ca;
-                                               ca = NR_A7 (m[0], d[3]);
-                                               d[0] = NR_COMPOSENNN_A7 (v, m[0], d[0], d[3], ca);
-                                               d[1] = NR_COMPOSENNN_A7 (v, m[0], d[1], d[3], ca);
-                                               d[2] = NR_COMPOSENNN_A7 (v, m[0], d[2], d[3], ca);
-                                               d[3] = (ca + 127) / 255;
+                                               ca = NR_COMPOSEA_112(m[0], d[3]);
+                                               d[0] = NR_COMPOSENNN_111121 (v, m[0], d[0], d[3], ca);
+                                               d[1] = NR_COMPOSENNN_111121 (v, m[0], d[1], d[3], ca);
+                                               d[2] = NR_COMPOSENNN_111121 (v, m[0], d[2], d[3], ca);
+                                               d[3] = NR_NORMALIZE_21(ca);
                                        }
                                        break;
                                case NR_PIXBLOCK_MODE_R8G8B8A8P:
-                                       d[0] = NR_COMPOSENPP (v, m[0], d[0], d[3]);
-                                       d[1] = NR_COMPOSENPP (v, m[0], d[1], d[3]);
-                                       d[2] = NR_COMPOSENPP (v, m[0], d[2], d[3]);
-                                       d[3] = (NR_A7 (d[3], m[0]) + 127) / 255;
+                                       d[0] = NR_COMPOSENPP_1111 (v, m[0], d[0]);
+                                       d[1] = NR_COMPOSENPP_1111 (v, m[0], d[1]);
+                                       d[2] = NR_COMPOSENPP_1111 (v, m[0], d[2]);
+                                       d[3] = NR_COMPOSEA_111(d[3], m[0]);
                                        break;
                                default:
                                        break;
index c778c0c7f626980c001dcede4c5e7525009c1716..109ed69dc819c318c2b1c10abae8838e248a24d1 100644 (file)
@@ -47,14 +47,14 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                                d[2] = s[2];
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8N:
-                               d[0] = NR_COMPOSEN11 (s[0], s[3], 255);
-                               d[1] = NR_COMPOSEN11 (s[1], s[3], 255);
-                               d[2] = NR_COMPOSEN11 (s[2], s[3], 255);
+                               d[0] = NR_COMPOSEN11_1111 (s[0], s[3], 255);
+                               d[1] = NR_COMPOSEN11_1111 (s[1], s[3], 255);
+                               d[2] = NR_COMPOSEN11_1111 (s[2], s[3], 255);
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8P:
-                               d[0] = NR_COMPOSEP11 (s[0], s[3], 255);
-                               d[1] = NR_COMPOSEP11 (s[1], s[3], 255);
-                               d[2] = NR_COMPOSEP11 (s[2], s[3], 255);
+                               d[0] = NR_COMPOSEP11_1111 (s[0], s[3], 255);
+                               d[1] = NR_COMPOSEP11_1111 (s[1], s[3], 255);
+                               d[2] = NR_COMPOSEP11_1111 (s[2], s[3], 255);
                                break;
                        default:
                                break;
@@ -82,9 +82,9 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                                        d[1] = 255;
                                        d[2] = 255;
                                } else {
-                                       d[0] = (s[0] * 255) / s[3];
-                                       d[1] = (s[1] * 255) / s[3];
-                                       d[2] = (s[2] * 255) / s[3];
+                                       d[0] = NR_DEMUL_111(s[0], s[3]);
+                                       d[1] = NR_DEMUL_111(s[0], s[3]);
+                                       d[2] = NR_DEMUL_111(s[0], s[3]);
                                }
                                d[3] = s[3];
                                break;
@@ -103,9 +103,9 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                                d[3] = 255;
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8N:
-                               d[0] = NR_PREMUL (s[0], s[3]);
-                               d[1] = NR_PREMUL (s[1], s[3]);
-                               d[2] = NR_PREMUL (s[2], s[3]);
+                               d[0] = NR_PREMUL_111 (s[0], s[3]);
+                               d[1] = NR_PREMUL_111 (s[1], s[3]);
+                               d[2] = NR_PREMUL_111 (s[2], s[3]);
                                d[3] = s[3];
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8P:
@@ -132,10 +132,10 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                                d[0] = 255;
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8N:
-                               d[0] = NR_A7_NORMALIZED(s[3],d[0]);
+                               d[0] = NR_COMPOSEA_111(s[3], d[0]);
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8P:
-                               d[0] = NR_A7_NORMALIZED(s[3],d[0]);
+                               d[0] = NR_COMPOSEA_111(s[3], d[0]);
                                break;
                        default:
                                break;
@@ -151,14 +151,14 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                                d[2] = s[2];
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8N:
-                               d[0] = NR_COMPOSEN11 (s[0], s[3], d[0]);
-                               d[1] = NR_COMPOSEN11 (s[1], s[3], d[1]);
-                               d[2] = NR_COMPOSEN11 (s[2], s[3], d[2]);
+                               d[0] = NR_COMPOSEN11_1111 (s[0], s[3], d[0]);
+                               d[1] = NR_COMPOSEN11_1111 (s[1], s[3], d[1]);
+                               d[2] = NR_COMPOSEN11_1111 (s[2], s[3], d[2]);
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8P:
-                               d[0] = NR_COMPOSEP11 (s[0], s[3], d[0]);
-                               d[1] = NR_COMPOSEP11 (s[1], s[3], d[1]);
-                               d[2] = NR_COMPOSEP11 (s[2], s[3], d[2]);
+                               d[0] = NR_COMPOSEP11_1111 (s[0], s[3], d[0]);
+                               d[1] = NR_COMPOSEP11_1111 (s[1], s[3], d[1]);
+                               d[2] = NR_COMPOSEP11_1111 (s[2], s[3], d[2]);
                                break;
                        default:
                                break;
@@ -176,21 +176,21 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                        case NR_PIXBLOCK_MODE_R8G8B8A8N:
                                if (s[3] != 0) {
                                        unsigned int ca;
-                                       ca = NR_A7 (s[3], d[3]);
-                                       d[0] = NR_COMPOSENNN_A7 (s[0], s[3], d[0], d[3], ca);
-                                       d[1] = NR_COMPOSENNN_A7 (s[1], s[3], d[1], d[3], ca);
-                                       d[2] = NR_COMPOSENNN_A7 (s[2], s[3], d[2], d[3], ca);
-                                       d[3] = (ca + 127) / 255;
+                                       ca = NR_COMPOSEA_112(s[3], d[3]);
+                                       d[0] = NR_COMPOSENNN_111121 (s[0], s[3], d[0], d[3], ca);
+                                       d[1] = NR_COMPOSENNN_111121 (s[1], s[3], d[1], d[3], ca);
+                                       d[2] = NR_COMPOSENNN_111121 (s[2], s[3], d[2], d[3], ca);
+                                       d[3] = NR_NORMALIZE_21(ca);
                                }
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8P:
                                if (s[3] != 0) {
                                        unsigned int ca;
-                                       ca = NR_A7 (s[3], d[3]);
-                                       d[0] = NR_COMPOSEPNN_A7 (s[0], s[3], d[0], d[3], ca);
-                                       d[1] = NR_COMPOSEPNN_A7 (s[1], s[3], d[0], d[3], ca);
-                                       d[2] = NR_COMPOSEPNN_A7 (s[2], s[3], d[0], d[3], ca);
-                                       d[3] = (ca + 127) / 255;
+                                       ca = NR_COMPOSEA_112(s[3], d[3]);
+                                       d[0] = NR_COMPOSEPNN_111121 (s[0], s[3], d[0], d[3], ca);
+                                       d[1] = NR_COMPOSEPNN_111121 (s[1], s[3], d[0], d[3], ca);
+                                       d[2] = NR_COMPOSEPNN_111121 (s[2], s[3], d[0], d[3], ca);
+                                       d[3] = NR_NORMALIZE_21(ca);
                                }
                                break;
                        default:
@@ -207,16 +207,16 @@ nr_compose_pixblock_pixblock_pixel (NRPixBlock *dpb, unsigned char *d, const NRP
                                d[2] = s[2];
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8N:
-                               d[0] = NR_COMPOSENPP (s[0], s[3], d[0], d[3]);
-                               d[1] = NR_COMPOSENPP (s[1], s[3], d[1], d[3]);
-                               d[2] = NR_COMPOSENPP (s[2], s[3], d[2], d[3]);
-                               d[3] = NR_A7_NORMALIZED(s[3],d[3]);
+                               d[0] = NR_COMPOSENPP_1111 (s[0], s[3], d[0]);
+                               d[1] = NR_COMPOSENPP_1111 (s[1], s[3], d[1]);
+                               d[2] = NR_COMPOSENPP_1111 (s[2], s[3], d[2]);
+                               d[3] = NR_COMPOSEA_111(s[3], d[3]);
                                break;
                        case NR_PIXBLOCK_MODE_R8G8B8A8P:
-                               d[0] = NR_COMPOSEPPP (s[0], s[3], d[0], d[3]);
-                               d[1] = NR_COMPOSEPPP (s[1], s[3], d[1], d[3]);
-                               d[2] = NR_COMPOSEPPP (s[2], s[3], d[2], d[3]);
-                               d[3] = NR_A7_NORMALIZED(s[3],d[3]);
+                               d[0] = NR_COMPOSEPPP_1111 (s[0], s[3], d[0]);
+                               d[1] = NR_COMPOSEPPP_1111 (s[1], s[3], d[1]);
+                               d[2] = NR_COMPOSEPPP_1111 (s[2], s[3], d[2]);
+                               d[3] = NR_COMPOSEA_111(s[3], d[3]);
                                break;
                        default:
                                break;
index ba7fbc41ad0850cea7738e549aaef84f3883c8e8..2c41f8dbf053006aaa1c39d7e7ae4a7a3a12d7cc 100644 (file)
 #define NR_RGBA32_B(v) (unsigned char) (((v) >> 8) & 0xff)
 #define NR_RGBA32_A(v) (unsigned char) ((v) & 0xff)
 
-#define FAST_DIVIDE_BY_255(v) ((((v) << 8) + (v) + 257) >> 16)
+// FAST_DIVIDE assumes that 0<=num<=256*denom
+//   (this covers the case that num=255*denom+denom/2, which is used by DIV_ROUND)
+template<unsigned int divisor> static inline unsigned int FAST_DIVIDE(unsigned int v) { return v/divisor; }
+template<> static inline unsigned int FAST_DIVIDE<255>(unsigned int v) { return ((v+1)*0x101) >> 16; }
+template<> static inline unsigned int FAST_DIVIDE<255*255>(unsigned int v) { v=(v+1)<<1; v=v+(v>>7)+((v*0x3)>>16)+(v>>22); return (v>>16)>>1; }
+// FAST_DIV_ROUND assumes that 0<=num<=255*denom (DIV_ROUND should work upto num=2^32-1-(denom/2),
+// but FAST_DIVIDE_BY_255 already fails at num=65790=258*255, which is not too far above 255.5*255)
+template<unsigned int divisor> static inline unsigned int FAST_DIV_ROUND(unsigned int v) { return FAST_DIVIDE<divisor>(v+(divisor)/2); }
+static inline unsigned int DIV_ROUND(unsigned int v, unsigned int divisor) { return (v+divisor/2)/divisor; }
 
-#define NR_A7(fa,ba) (65025 - (255 - fa) * (255 - ba))
-#define NR_COMPOSENNN_A7(fc,fa,bc,ba,a) (((255 - (fa)) * (bc) * (ba) + (fa) * (fc) * 255 + 127) / a)
-#define NR_COMPOSEPNN_A7(fc,fa,bc,ba,a) (((255 - (fa)) * (bc) * (ba) + (fc) * 65025 + 127) / a)
-#define NR_COMPOSENNP(fc,fa,bc,ba) (((255 - (fa)) * (bc) * (ba) + (fa) * (fc) * 255 + 32512) / 65025)
-#define NR_COMPOSEPNP(fc,fa,bc,ba) (((255 - (fa)) * (bc) * (ba) + (fc) * 65025 + 32512) / 65025)
-#define INK_COMPOSE(f,a,b) ( ( ((guchar) b) * ((guchar) (0xff - a)) + ((guchar) ((b ^ ~f) + b/4 - (b>127? 63 : 0))) * ((guchar) a) ) >>8)
-#define NR_PREMUL(c,a) (FAST_DIVIDE_BY_255(((c) * (a) + 127)))
-#define NR_PREMUL_SINGLE(c) (FAST_DIVIDE_BY_255((c) + 127))
+#define INK_COMPOSE(f,a,b) ( ( ((guchar) (b)) * ((guchar) (0xff - (a))) + ((guchar) (((b) ^ ~(f)) + (b)/4 - ((b)>127? 63 : 0))) * ((guchar) (a)) ) >>8)
 
-#if 0
+// Naming: OPb_i+o
+//   OP  = operation, for example: NORMALIZE, COMPOSEA, COMPOSENNN, PREMUL, etc.
+//   i+o = range of input/output as powers of 2^8-1
+//         for example, 213 means 0<=a<=255^2, 0<=b<=255, 0<=output<=255^3
 
-#define NR_A7_NORMALIZED(fa,ba) (FAST_DIVIDE_BY_255((65025 - (255 - (fa)) * (255 - (ba))) + 127))
-#define NR_COMPOSENPP(fc,fa,bc,ba) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fa) * (fc) + 127))
-#define NR_COMPOSEPPP(fc,fa,bc,ba) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fc) * 255 + 127))
-#define NR_COMPOSEP11(fc,fa,bc) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fc) * 255 + 127))
-#define NR_COMPOSEN11(fc,fa,bc) (FAST_DIVIDE_BY_255((255 - (fa)) * (bc) + (fc) * (fa) + 127))
+// Normalize
+static inline unsigned int NR_NORMALIZE_11(unsigned int v) { return v; }
+static inline unsigned int NR_NORMALIZE_21(unsigned int v) { return FAST_DIV_ROUND<255>(v); }
+static inline unsigned int NR_NORMALIZE_31(unsigned int v) { return FAST_DIV_ROUND<255*255>(v); }
+static inline unsigned int NR_NORMALIZE_41(unsigned int v) { return FAST_DIV_ROUND<255*255*255>(v); }
 
-#else
+// Compose alpha channel using (1 - (1-a)*(1-b))
+//   Note that these can also be rewritten to NR_COMPOSENPP(255, a, b), slightly slower, but could help if someone
+//   decides to use SSE or something similar (for allowing the four components to be treated the same way).
+static inline unsigned int NR_COMPOSEA_213(unsigned int a, unsigned int b) { return 255*255*255 - (255*255-a)*(255-b); }
+static inline unsigned int NR_COMPOSEA_112(unsigned int a, unsigned int b) { return 255*255 - (255-a)*(255-b); }
+static inline unsigned int NR_COMPOSEA_211(unsigned int a, unsigned int b) { return NR_NORMALIZE_31(NR_COMPOSEA_213(a, b)); }
+static inline unsigned int NR_COMPOSEA_111(unsigned int a, unsigned int b) { return NR_NORMALIZE_21(NR_COMPOSEA_112(a, b)); }
 
-inline int NR_A7_NORMALIZED(int fa,int ba) {int temp=(65025 - (255 - (fa)) * (255 - (ba))) + 127; return FAST_DIVIDE_BY_255(temp);}
-inline int NR_COMPOSENPP(int fc,int fa,int bc,int ba) {int temp=(255 - (fa)) * (bc) + (fa) * (fc) + 127; return FAST_DIVIDE_BY_255(temp);}
-inline int NR_COMPOSEPPP(int fc,int fa,int bc,int ba) {int temp=(255 - (fa)) * (bc) + (fc) * 255 + 127; return FAST_DIVIDE_BY_255(temp);}
-inline int NR_COMPOSEP11(int fc,int fa,int bc) {int temp=(255 - (fa)) * (bc) + (fc) * 255 + 127; return FAST_DIVIDE_BY_255(temp);}
-inline int NR_COMPOSEN11(int fc,int fa,int bc) {int temp=(255 - (fa)) * (bc) + (fc) * (fa) + 127; return FAST_DIVIDE_BY_255(temp);}
+// Operation: (1 - fa) * bc * ba + fa * fc
+static inline unsigned int NR_COMPOSENNP_12114(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255*255 - fa) * ba * bc + 255 * fa * fc; }
+static inline unsigned int NR_COMPOSENNP_11113(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255 - fa) * ba * bc + 255 * fa * fc; }
+static inline unsigned int NR_COMPOSENNP_11111(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return NR_NORMALIZE_31(NR_COMPOSENNP_11113(fc, fa, bc, ba)); }
+
+// Operation: (1 - fa) * bc * ba + fc
+static inline unsigned int NR_COMPOSEPNP_22114(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255*255 - fa) * ba * bc + 255*255 * fc; }
+static inline unsigned int NR_COMPOSEPNP_11113(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return (255 - fa) * ba * bc + 255*255 * fc; }
+static inline unsigned int NR_COMPOSEPNP_22111(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return NR_NORMALIZE_41(NR_COMPOSEPNP_22114(fc, fa, bc, ba)); }
+static inline unsigned int NR_COMPOSEPNP_11111(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba) { return NR_NORMALIZE_31(NR_COMPOSEPNP_11113(fc, fa, bc, ba)); }
+
+// Operation: ((1 - fa) * bc * ba + fa * fc)/a
+//   Reuses non-normalized versions of NR_COMPOSENNP
+static inline unsigned int NR_COMPOSENNN_121131(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSENNP_12114(fc, fa, bc, ba), a); }
+static inline unsigned int NR_COMPOSENNN_111121(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSENNP_11113(fc, fa, bc, ba), a); }
+
+// Operation: ((1 - fa) * bc * ba + fc)/a
+//   Reuses non-normalized versions of NR_COMPOSEPNP
+static inline unsigned int NR_COMPOSEPNN_221131(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSEPNP_22114(fc, fa, bc, ba), a); }
+static inline unsigned int NR_COMPOSEPNN_111121(unsigned int fc, unsigned int fa, unsigned int bc, unsigned int ba, unsigned int a) { return DIV_ROUND(NR_COMPOSEPNP_11113(fc, fa, bc, ba), a); }
+
+// Operation: (1 - fa) * bc + fa * fc
+//   (1-fa)*bc+fa*fc = bc-fa*bc+fa*fc = bc+fa*(fc-bc)
+// For some reason it's faster to leave the initial 255*bc term in the non-normalized version instead of factoring it out...
+static inline unsigned int NR_COMPOSENPP_1213(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*255*bc + fa*(fc-bc); }
+static inline unsigned int NR_COMPOSENPP_1123(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*bc + fa*(255*fc-bc); }
+static inline unsigned int NR_COMPOSENPP_1112(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*bc + fa*(fc-bc); }
+static inline unsigned int NR_COMPOSENPP_1211(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_31(NR_COMPOSENPP_1213(fc, fa, bc)); }
+static inline unsigned int NR_COMPOSENPP_1121(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_31(NR_COMPOSENPP_1123(fc, fa, bc)); }
+static inline unsigned int NR_COMPOSENPP_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21(NR_COMPOSENPP_1112(fc, fa, bc)); }
+
+// Operation: (1 - fa) * bc + fc
+//   (1-fa)*bc+fc = bc-fa*bc+fc = (bc+fc)-fa*bc
+// This rewritten form results in faster code (found out through testing)
+static inline unsigned int NR_COMPOSEPPP_2224(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*255*(bc+fc) - fa*bc; }
+  // NR_COMPOSEPPP_2224 assumes that fa and fc have a common component (fa=a*x and fc=c*x), because then the maximum value is: 
+  //   (255*255-255*x)*255*255 + 255*x*255*255 = 255*255*( (255*255-255*x) + 255*x ) = 255*255*255*( (255-x)+x ) = 255*255*255*255
+static inline unsigned int NR_COMPOSEPPP_2213(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*(255*bc+fc) - fa*bc; }
+static inline unsigned int NR_COMPOSEPPP_1213(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*255*(bc+fc) - fa*bc; }
+static inline unsigned int NR_COMPOSEPPP_1112(unsigned int fc, unsigned int fa, unsigned int bc) { return 255*(bc+fc) - fa*bc; }
+static inline unsigned int NR_COMPOSEPPP_2221(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_41(NR_COMPOSEPPP_2224(fc, fa, bc)); }
+static inline unsigned int NR_COMPOSEPPP_2211(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_31(NR_COMPOSEPPP_2213(fc, fa, bc)); }
+static inline unsigned int NR_COMPOSEPPP_1211(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21(NR_COMPOSEPPP_1213(fc, fa, bc)); }
+static inline unsigned int NR_COMPOSEPPP_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21(NR_COMPOSEPPP_1112(fc, fa, bc)); }
+
+#define NR_COMPOSEN11_1211 NR_COMPOSENPP_1211
+#define NR_COMPOSEN11_1111 NR_COMPOSENPP_1111
+//inline unsigned int NR_COMPOSEN11_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21((255 - fa) * bc + fa * fc ); }
+
+#define NR_COMPOSEP11_2211 NR_COMPOSEPPP_2211
+#define NR_COMPOSEP11_1211 NR_COMPOSEPPP_1211
+#define NR_COMPOSEP11_1111 NR_COMPOSEPPP_1111
+//inline unsigned int NR_COMPOSEP11_1111(unsigned int fc, unsigned int fa, unsigned int bc) { return NR_NORMALIZE_21((255 - fa) * bc + fc * 255); }
+
+// Premultiply using c*a
+static inline unsigned int NR_PREMUL_134(unsigned int c, unsigned int a) { return c * a; }
+static inline unsigned int NR_PREMUL_224(unsigned int c, unsigned int a) { return c * a; }
+static inline unsigned int NR_PREMUL_123(unsigned int c, unsigned int a) { return c * a; }
+static inline unsigned int NR_PREMUL_112(unsigned int c, unsigned int a) { return c * a; }
+static inline unsigned int NR_PREMUL_314(unsigned int c, unsigned int a) { return NR_PREMUL_134(c, a); }
+static inline unsigned int NR_PREMUL_213(unsigned int c, unsigned int a) { return NR_PREMUL_123(c, a); }
+static inline unsigned int NR_PREMUL_131(unsigned int c, unsigned int a) { return NR_NORMALIZE_41(NR_PREMUL_134(c, a)); }
+static inline unsigned int NR_PREMUL_221(unsigned int c, unsigned int a) { return NR_NORMALIZE_41(NR_PREMUL_224(c, a)); }
+static inline unsigned int NR_PREMUL_121(unsigned int c, unsigned int a) { return NR_NORMALIZE_31(NR_PREMUL_123(c, a)); }
+static inline unsigned int NR_PREMUL_111(unsigned int c, unsigned int a) { return NR_NORMALIZE_21(NR_PREMUL_112(c, a)); }
+static inline unsigned int NR_PREMUL_311(unsigned int c, unsigned int a) { return NR_NORMALIZE_41(NR_PREMUL_314(c, a)); }
+static inline unsigned int NR_PREMUL_211(unsigned int c, unsigned int a) { return NR_NORMALIZE_31(NR_PREMUL_213(c, a)); }
+
+// Demultiply using c/a
+static inline unsigned int NR_DEMUL_131(unsigned int c, unsigned int a) { return DIV_ROUND(255 * 255 * 255 * c, a); }
+static inline unsigned int NR_DEMUL_231(unsigned int c, unsigned int a) { return DIV_ROUND(255 * 255 * c, a); }
+static inline unsigned int NR_DEMUL_121(unsigned int c, unsigned int a) { return DIV_ROUND(255 * 255 * c, a); }
+static inline unsigned int NR_DEMUL_331(unsigned int c, unsigned int a) { return DIV_ROUND(255 * c, a); }
+static inline unsigned int NR_DEMUL_221(unsigned int c, unsigned int a) { return DIV_ROUND(255 * c, a); }
+static inline unsigned int NR_DEMUL_111(unsigned int c, unsigned int a) { return DIV_ROUND(255 * c, a); }
+static inline unsigned int NR_DEMUL_431(unsigned int c, unsigned int a) { return DIV_ROUND(c, a); }
+static inline unsigned int NR_DEMUL_321(unsigned int c, unsigned int a) { return DIV_ROUND(c, a); }
+static inline unsigned int NR_DEMUL_211(unsigned int c, unsigned int a) { return DIV_ROUND(c, a); }
+static inline unsigned int NR_DEMUL_421(unsigned int c, unsigned int a) { return DIV_ROUND(c, 255 * a); }
+static inline unsigned int NR_DEMUL_311(unsigned int c, unsigned int a) { return DIV_ROUND(c, 255 * a); }
+static inline unsigned int NR_DEMUL_411(unsigned int c, unsigned int a) { return DIV_ROUND(c, 255 * 255 * a); }
 
-#endif
 
 #endif
 
index d56a6bdff2201c9ab3d137f0e87bd39018b09f9f..edcfe009793253313fd4f3e5ff71ffe1624ee8a0 100644 (file)
@@ -26,6 +26,8 @@ extract-uri-test.cpp
 helper/units-test.cpp
 inkview.cpp
 libnr/in-svg-plane-test.cpp
+libnr/nr-compose-reference.cpp
+libnr/nr-compose-test.cpp
 libnr/nr-matrix-test.cpp
 libnr/nr-point-fns-test.cpp
 libnr/nr-rotate-fns-test.cpp