Code

Fix for bug #455302 and bug #165529, also partially fixes bounding box of various...
authorJasper van de Gronde <jasper.vandegronde@gmail.com>
Sun, 18 Apr 2010 14:17:05 +0000 (16:17 +0200)
committerJasper van de Gronde <jasper.vandegronde@gmail.com>
Sun, 18 Apr 2010 14:17:05 +0000 (16:17 +0200)
src/display/nr-arena-glyphs.cpp
src/display/nr-arena-image.cpp
src/display/nr-arena-shape.cpp
src/display/nr-filter-displacement-map.cpp
src/display/pixblock-transform.cpp
src/libnr/nr-compose-transform.cpp

index ff320bd8125eb49d018697e9548c449c4a6a0134..33b08a91c34952c3eb56c6c5d952ed1e2596e163 100644 (file)
@@ -211,10 +211,10 @@ nr_arena_glyphs_update(NRArenaItem *item, NRRectL */*area*/, NRGC *gc, guint /*s
     }
     if (nr_rect_d_test_empty(bbox)) return NR_ARENA_ITEM_STATE_ALL;
 
-    item->bbox.x0 = (gint32)(bbox.x0 - 1.0);
-    item->bbox.y0 = (gint32)(bbox.y0 - 1.0);
-    item->bbox.x1 = (gint32)(bbox.x1 + 1.0);
-    item->bbox.y1 = (gint32)(bbox.y1 + 1.0);
+    item->bbox.x0 = static_cast<NR::ICoord>(floor(bbox.x0));
+    item->bbox.y0 = static_cast<NR::ICoord>(floor(bbox.y0));
+    item->bbox.x1 = static_cast<NR::ICoord>(ceil (bbox.x1));
+    item->bbox.y1 = static_cast<NR::ICoord>(ceil (bbox.y1));
 
     return NR_ARENA_ITEM_STATE_ALL;
 }
index f45a2da4f8c8c44e2a05b7733731aadf6e5303aa..4939431689555e6ddb958a0caac58cbb0eaffc6f 100644 (file)
@@ -151,10 +151,10 @@ nr_arena_image_update( NRArenaItem *item, NRRectL */*area*/, NRGC *gc, unsigned
 
         nr_rect_d_matrix_transform (&bbox, &bbox, gc->transform);
 
-        item->bbox.x0 = (int) floor (bbox.x0);
-        item->bbox.y0 = (int) floor (bbox.y0);
-        item->bbox.x1 = (int) ceil (bbox.x1);
-        item->bbox.y1 = (int) ceil (bbox.y1);
+        item->bbox.x0 = static_cast<NR::ICoord>(floor(bbox.x0)); // Floor gives the coordinate in which the point resides
+        item->bbox.y0 = static_cast<NR::ICoord>(floor(bbox.y0));
+        item->bbox.x1 = static_cast<NR::ICoord>(ceil (bbox.x1)); // Ceil gives the first coordinate beyond the point
+        item->bbox.y1 = static_cast<NR::ICoord>(ceil (bbox.y1));
     } else {
         item->bbox.x0 = (int) gc->transform[4];
         item->bbox.y0 = (int) gc->transform[5];
@@ -211,13 +211,7 @@ nr_arena_image_render( cairo_t *ct, NRArenaItem *item, NRRectL */*area*/, NRPixB
         } else if (pb->mode == NR_PIXBLOCK_MODE_R8G8B8A8P) {
             nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (dpx, dw, dh, drs, spx, sw, sh, srs, d2s, Falpha, nr_arena_image_x_sample, nr_arena_image_y_sample);
         } else if (pb->mode == NR_PIXBLOCK_MODE_R8G8B8A8N) {
-
-            //FIXME: The _N_N_N_ version gives a gray border around images, see bug 906376
-            // This mode is only used when exporting, screen rendering always has _P_P_P_, so I decided to simply replace it for now
-            // Feel free to propose a better fix
-
-            //nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (dpx, dw, dh, drs, spx, sw, sh, srs, d2s, Falpha, nr_arena_image_x_sample, nr_arena_image_y_sample);
-            nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (dpx, dw, dh, drs, spx, sw, sh, srs, d2s, Falpha, nr_arena_image_x_sample, nr_arena_image_y_sample);
+            nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (dpx, dw, dh, drs, spx, sw, sh, srs, d2s, Falpha, nr_arena_image_x_sample, nr_arena_image_y_sample);
         }
 
         pb->empty = FALSE;
index e2a9e958064128a00df160849ce4fba434611b56..a3b295a4efcae5b18b255eaec1be3ce2df1f2a6b 100644 (file)
@@ -254,12 +254,11 @@ nr_arena_shape_update(NRArenaItem *item, NRRectL *area, NRGC *gc, guint state, g
         if (state & NR_ARENA_ITEM_STATE_BBOX) {
             if (shape->curve) {
                 boundingbox = bounds_exact_transformed(shape->curve->get_pathvector(), gc->transform);
-                /// \todo  just write item->bbox = boundingbox
                 if (boundingbox) {
-                    item->bbox.x0 = (gint32)((*boundingbox)[0][0] - 1.0F);
-                    item->bbox.y0 = (gint32)((*boundingbox)[1][0] - 1.0F);
-                    item->bbox.x1 = (gint32)((*boundingbox)[0][1] + 1.9999F);
-                    item->bbox.y1 = (gint32)((*boundingbox)[1][1] + 1.9999F);
+                    item->bbox.x0 = static_cast<NR::ICoord>(floor((*boundingbox)[0][0])); // Floor gives the coordinate in which the point resides
+                    item->bbox.y0 = static_cast<NR::ICoord>(floor((*boundingbox)[1][0]));
+                    item->bbox.x1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[0][1])); // Ceil gives the first coordinate beyond the point
+                    item->bbox.y1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[1][1]));
                 } else {
                     item->bbox = NR_RECT_L_EMPTY;
                 }
@@ -300,10 +299,10 @@ nr_arena_shape_update(NRArenaItem *item, NRRectL *area, NRGC *gc, guint state, g
 
     /// \todo  just write item->bbox = boundingbox
     if (boundingbox) {
-        shape->approx_bbox.x0 = (gint32)((*boundingbox)[0][0] - 1.0F);
-        shape->approx_bbox.y0 = (gint32)((*boundingbox)[1][0] - 1.0F);
-        shape->approx_bbox.x1 = (gint32)((*boundingbox)[0][1] + 1.9999F);
-        shape->approx_bbox.y1 = (gint32)((*boundingbox)[1][1] + 1.9999F);
+        shape->approx_bbox.x0 = static_cast<NR::ICoord>(floor((*boundingbox)[0][0]));
+        shape->approx_bbox.y0 = static_cast<NR::ICoord>(floor((*boundingbox)[1][0]));
+        shape->approx_bbox.x1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[0][1]));
+        shape->approx_bbox.y1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[1][1]));
     } else {
         shape->approx_bbox = NR_RECT_L_EMPTY;
     }
@@ -349,10 +348,10 @@ nr_arena_shape_update(NRArenaItem *item, NRRectL *area, NRGC *gc, guint state, g
 
         /// \todo  just write shape->approx_bbox = boundingbox
         if (boundingbox) {
-            shape->approx_bbox.x0 = (gint32)((*boundingbox)[0][0] - 1.0F);
-            shape->approx_bbox.y0 = (gint32)((*boundingbox)[1][0] - 1.0F);
-            shape->approx_bbox.x1 = (gint32)((*boundingbox)[0][1] + 1.9999F);
-            shape->approx_bbox.y1 = (gint32)((*boundingbox)[1][1] + 1.9999F);
+            shape->approx_bbox.x0 = static_cast<NR::ICoord>(floor((*boundingbox)[0][0]));
+            shape->approx_bbox.y0 = static_cast<NR::ICoord>(floor((*boundingbox)[1][0]));
+            shape->approx_bbox.x1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[0][1]));
+            shape->approx_bbox.y1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[1][1]));
         } else {
             shape->approx_bbox = NR_RECT_L_EMPTY;
         }
@@ -362,10 +361,10 @@ nr_arena_shape_update(NRArenaItem *item, NRRectL *area, NRGC *gc, guint state, g
         return NR_ARENA_ITEM_STATE_ALL;
 
     /// \todo  just write item->bbox = boundingbox
-    item->bbox.x0 = (gint32)((*boundingbox)[0][0] - 1.0F);
-    item->bbox.y0 = (gint32)((*boundingbox)[1][0] - 1.0F);
-    item->bbox.x1 = (gint32)((*boundingbox)[0][1] + 1.0F);
-    item->bbox.y1 = (gint32)((*boundingbox)[1][1] + 1.0F);
+    item->bbox.x0 = static_cast<NR::ICoord>(floor((*boundingbox)[0][0]));
+    item->bbox.y0 = static_cast<NR::ICoord>(floor((*boundingbox)[1][0]));
+    item->bbox.x1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[0][1]));
+    item->bbox.y1 = static_cast<NR::ICoord>(ceil ((*boundingbox)[1][1]));
 
     item->render_opacity = TRUE;
     if ( shape->_fill.paint.type() == NRArenaShape::Paint::SERVER ) {
index 4de5e658cd3909a111ca661f1eeaaf77894abe0c..a983fb840d22e2b2be1db4b7c632f1549503dc89 100644 (file)
@@ -43,9 +43,8 @@ struct pixel_t {
 
 static inline pixel_t pixelValue(NRPixBlock const* pb, int x, int y) {
     if ( x < pb->area.x0 || x >= pb->area.x1 || y < pb->area.y0 || y >= pb->area.y1 ) return pixel_t::blank(); // This assumes anything outside the defined range is (0,0,0,0)
-    pixel_t const* data = reinterpret_cast<pixel_t const*>(NR_PIXBLOCK_PX(pb));
-    int offset = (x-pb->area.x0) + (pb->area.x1-pb->area.x0)*(y-pb->area.y0);
-    return data[offset];
+    pixel_t const* rowData = reinterpret_cast<pixel_t const*>(NR_PIXBLOCK_PX(pb) + (y-pb->area.y0)*pb->rs);
+    return rowData[x-pb->area.x0];
 }
 
 template<bool PREMULTIPLIED>
@@ -74,18 +73,9 @@ static pixel_t interpolatePixels(NRPixBlock const* pb, double x, double y) {
      * We might as well avoid premultiplication in this case, which still gives us a fully
      * transparent result, but with interpolated RGB parts. */
 
-    /* First calculate interpolated alpha value. */
-    unsigned ra = 0;
-    if (!PREMULTIPLIED) {
-        unsigned const y0 = sf*p00[3] + xf*(p01[3]-p00[3]); // range [0,a*sf]
-        unsigned const y1 = sf*p10[3] + xf*(p11[3]-p10[3]);
-        ra = sf*y0 + yf*(y1-y0); // range [0,a*sf*sf]
-    }
-
     pixel_t r;
-    if (ra == 0) {
-        /* Either premultiplied or the interpolated alpha value is zero,
-         * so do simple interpolation. */
+    if (PREMULTIPLIED) {
+        /* Premultiplied, so do simple interpolation. */
         for (unsigned i = 0; i != 4; ++i) {
             // y0,y1 have range [0,a*sf]
             unsigned const y0 = sf*p00[i] + xf*((unsigned int)p01[i]-(unsigned int)p00[i]);
@@ -95,21 +85,39 @@ static pixel_t interpolatePixels(NRPixBlock const* pb, double x, double y) {
             r[i] = (ri + sf2h)>>(2*sfl); // range [0,a]
         }
     } else {
-        /* Do premultiplication ourselves. */
-        for (unsigned i = 0; i != 3; ++i) {
-            // Premultiplied versions.  Range [0,255*a].
-            unsigned const c00 = p00[i]*p00[3];
-            unsigned const c01 = p01[i]*p01[3];
-            unsigned const c10 = p10[i]*p10[3];
-            unsigned const c11 = p11[i]*p11[3];
-
-            // Interpolation.
-            unsigned const y0 = sf*c00 + xf*(c01-c00); // range [0,255*a*sf]
-            unsigned const y1 = sf*c10 + xf*(c11-c10); // range [0,255*a*sf]
-            unsigned const ri = sf*y0 + yf*(y1-y0); // range [0,255*a*sf*sf]
-            r[i] = (ri + ra/2) / ra;  // range [0,255]
+        /* First calculate interpolated alpha value. */
+        unsigned const y0 = sf*p00[3] + xf*((unsigned int)p01[3]-(unsigned int)p00[3]); // range [0,a*sf]
+        unsigned const y1 = sf*p10[3] + xf*((unsigned int)p11[3]-(unsigned int)p10[3]);
+        unsigned const ra = sf*y0 + yf*(y1-y0); // range [0,a*sf*sf]
+
+        if (ra==0) {
+            /* Fully transparent, so do simple interpolation. */
+            for (unsigned i = 0; i != 3; ++i) {
+                // y0,y1 have range [0,255*sf]
+                unsigned const y0 = sf*p00[i] + xf*((unsigned int)p01[i]-(unsigned int)p00[i]);
+                unsigned const y1 = sf*p10[i] + xf*((unsigned int)p11[i]-(unsigned int)p10[i]);
+
+                unsigned const ri = sf*y0 + yf*(y1-y0); // range [0,255*sf*sf]
+                r[i] = (ri + sf2h)>>(2*sfl); // range [0,255]
+            }
+            r[3] = 0;
+        } else {
+            /* Do premultiplication ourselves. */
+            for (unsigned i = 0; i != 3; ++i) {
+                // Premultiplied versions.  Range [0,255*a].
+                unsigned const c00 = p00[i]*p00[3];
+                unsigned const c01 = p01[i]*p01[3];
+                unsigned const c10 = p10[i]*p10[3];
+                unsigned const c11 = p11[i]*p11[3];
+
+                // Interpolation.
+                unsigned const y0 = sf*c00 + xf*(c01-c00); // range [0,255*a*sf]
+                unsigned const y1 = sf*c10 + xf*(c11-c10); // range [0,255*a*sf]
+                unsigned const ri = sf*y0 + yf*(y1-y0); // range [0,255*a*sf*sf]
+                r[i] = (ri + ra/2) / ra;  // range [0,255]
+            }
+            r[3] = (ra + sf2h)>>(2*sfl); // range [0,a]
         }
-        r[3] = (ra + sf2h)>>(2*sfl); // range [0,a]
     }
 
     return r;
@@ -117,19 +125,17 @@ static pixel_t interpolatePixels(NRPixBlock const* pb, double x, double y) {
 
 template<bool MAP_PREMULTIPLIED, bool DATA_PREMULTIPLIED>
 static void performDisplacement(NRPixBlock const* texture, NRPixBlock const* map, int Xchannel, int Ychannel, NRPixBlock* out, double scalex, double scaley) {
-    pixel_t *out_data = reinterpret_cast<pixel_t*>(NR_PIXBLOCK_PX(out));
-
     bool Xneedsdemul = MAP_PREMULTIPLIED && Xchannel<3;
     bool Yneedsdemul = MAP_PREMULTIPLIED && Ychannel<3;
     if (!Xneedsdemul) scalex /= 255.0;
     if (!Yneedsdemul) scaley /= 255.0;
 
     for (int yout=out->area.y0; yout < out->area.y1; yout++){
+        pixel_t const* mapRowData = reinterpret_cast<pixel_t const*>(NR_PIXBLOCK_PX(map) + (yout-map->area.y0)*map->rs);
+        pixel_t* outRowData = reinterpret_cast<pixel_t*>(NR_PIXBLOCK_PX(out) + (yout-out->area.y0)*out->rs);
         for (int xout=out->area.x0; xout < out->area.x1; xout++){
-            int xmap = xout;
-            int ymap = yout;
+            pixel_t const mapValue = mapRowData[xout-map->area.x0];
 
-            pixel_t mapValue = pixelValue(map, xmap, ymap);
             double xtex = xout + (Xneedsdemul ? // Although the value of the pixel corresponds to the MIDDLE of the pixel, no +0.5 is needed because we're interpolating pixels anyway (so to get the actual pixel locations 0.5 would have to be subtracted again).
                 (mapValue[3]==0?0:(scalex * (mapValue[Xchannel] - mapValue[3]*0.5) / mapValue[3])) :
                 (scalex * (mapValue[Xchannel] - 127.5)));
@@ -137,7 +143,7 @@ static void performDisplacement(NRPixBlock const* texture, NRPixBlock const* map
                 (mapValue[3]==0?0:(scaley * (mapValue[Ychannel] - mapValue[3]*0.5) / mapValue[3])) :
                 (scaley * (mapValue[Ychannel] - 127.5)));
 
-            out_data[(xout-out->area.x0) + (out->area.x1-out->area.x0)*(yout-out->area.y0)] = interpolatePixels<DATA_PREMULTIPLIED>(texture, xtex, ytex);
+            outRowData[xout-out->area.x0] = interpolatePixels<DATA_PREMULTIPLIED>(texture, xtex, ytex);
         }
     }
 }
@@ -152,8 +158,14 @@ int FilterDisplacementMap::render(FilterSlot &slot, FilterUnits const &units) {
         return 1;
     }
 
-    //TODO: check whether do we really need this check:
-    if (map->area.x1 <= map->area.x0 || map->area.y1 <=  map->area.y0) return 0; //nothing to do!
+    NR::IRect area = units.get_pixblock_filterarea_paraller();
+    int x0 = std::max(map->area.x0,area.min()[NR::X]);
+    int y0 = std::max(map->area.y0,area.min()[NR::Y]);
+    int x1 = std::min(map->area.x1,area.max()[NR::X]);
+    int y1 = std::min(map->area.y1,area.max()[NR::Y]);
+
+    //TODO: check whether we really need this check:
+    if (x1 <= x0 || y1 <= y0) return 0; //nothing to do!
 
     if (texture->mode != NR_PIXBLOCK_MODE_R8G8B8A8N && texture->mode != NR_PIXBLOCK_MODE_R8G8B8A8P) {
         g_warning("Source images without an alpha channel are not supported by feDisplacementMap at the moment.");
@@ -161,13 +173,7 @@ int FilterDisplacementMap::render(FilterSlot &slot, FilterUnits const &units) {
     }
 
     NRPixBlock *out = new NRPixBlock;
-    
-    out->area.x0 = map->area.x0;
-    out->area.y0 = map->area.y0;
-    out->area.x1 = map->area.x1;
-    out->area.y1 = map->area.y1;
-
-    nr_pixblock_setup_fast(out, texture->mode, out->area.x0, out->area.y0, out->area.x1, out->area.y1, true);
+    nr_pixblock_setup_fast(out, texture->mode, x0, y0, x1, y1, true);
 
     // convert to a suitable format
     bool free_map_on_exit = false;
index 73b467d5a8f4da558f0af1bd25d9373b74e15839..af05a9b8845a109696fa54feffe76feebf7b58b5 100644 (file)
@@ -148,6 +148,11 @@ void transform_bicubic(NRPixBlock *to, NRPixBlock *from, Geom::Matrix const &tra
         nr_blit_pixblock_pixblock(from, o_from);
         free_from_on_exit = true;
     }
+
+    if (from->mode != NR_PIXBLOCK_MODE_R8G8B8A8P) {
+        // TODO: Fix this... (The problem is that for interpolation non-premultiplied colors should be premultiplied...)
+        g_warning("transform_bicubic does not properly support non-premultiplied images");
+    }
     
     // Precalculate sizes of source and destination pixblocks
     int from_width = from->area.x1 - from->area.x0;
index 6e03faf2ff7a48a1c0a329c03a84533319fdd1e7..bc05c7b519d6feab284d1dedd87bf6da526982d5 100644 (file)
@@ -66,18 +66,25 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h,
 
        if (alpha == 0) return;
 
+    // Both alpha and color components are stored temporarily with a range of [0,255^2], so more supersampling and we get an overflow
+    if (xd+yd>16) {
+        xd = 8;
+        yd = 8;
+    }
+
        xsize = (1 << xd);
        ysize = (1 << yd);
        size = xsize * ysize;
        dbits = xd + yd;
+    unsigned int rounding_fix = size/2;
 
        /* Set up fixed point matrix */
-       FFs_x_x = (long) (d2s[0] * (1 << FBITS) + 0.5);
-       FFs_x_y = (long) (d2s[1] * (1 << FBITS) + 0.5);
-       FFs_y_x = (long) (d2s[2] * (1 << FBITS) + 0.5);
-       FFs_y_y = (long) (d2s[3] * (1 << FBITS) + 0.5);
-       FFs__x = (long) (d2s[4] * (1 << FBITS) + 0.5);
-       FFs__y = (long) (d2s[5] * (1 << FBITS) + 0.5);
+       FFs_x_x = (long) floor(d2s[0] * (1 << FBITS) + 0.5);
+       FFs_x_y = (long) floor(d2s[1] * (1 << FBITS) + 0.5);
+       FFs_y_x = (long) floor(d2s[2] * (1 << FBITS) + 0.5);
+       FFs_y_y = (long) floor(d2s[3] * (1 << FBITS) + 0.5);
+       FFs__x = (long) floor(d2s[4] * (1 << FBITS) + 0.5);
+       FFs__y = (long) floor(d2s[5] * (1 << FBITS) + 0.5);
 
        FFs_x_x_S = FFs_x_x >> xd;
        FFs_x_y_S = FFs_x_y >> xd;
@@ -114,35 +121,40 @@ nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h,
                                        sy = (FFsy + FF_sy_S[i]) >> FBITS;
                                        if ((sy >= 0) && (sy < sh)) {
                                                const unsigned char *s;
-                                               unsigned int ca;
                                                s = spx + sy * srs + sx * 4;
-                                               ca = NR_PREMUL_112 (s[3], alpha);
-                                               r += NR_PREMUL_121 (s[0], ca);
-                                               g += NR_PREMUL_121 (s[1], ca);
-                                               b += NR_PREMUL_121 (s[2], ca);
-                                               a += NR_NORMALIZE_21(ca);
+                                               r += NR_PREMUL_112 (s[0], s[3]);
+                                               g += NR_PREMUL_112 (s[1], s[3]);
+                                               b += NR_PREMUL_112 (s[2], s[3]);
+                                               a += s[3];
                                        }
                                }
                        }
-                       a >>= dbits;
+                       a = (a*alpha + rounding_fix) >> dbits;
+            // Compare to nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P
                        if (a != 0) {
-                               r = r >> dbits;
-                               g = g >> dbits;
-                               b = b >> dbits;
-                               if (a == 255) {
-                                       /* Transparent BG, premul src */
-                                       d[0] = r;
-                                       d[1] = g;
-                                       d[2] = b;
-                                       d[3] = a;
+                               r = (r + rounding_fix) >> dbits;
+                               g = (g + rounding_fix) >> dbits;
+                               b = (b + rounding_fix) >> dbits;
+                if (a == 255) {
+                                       /* Full coverage, demul src */
+                                       d[0] = NR_NORMALIZE_21(r);
+                                       d[1] = NR_NORMALIZE_21(g);
+                                       d[2] = NR_NORMALIZE_21(b);
+                                       d[3] = NR_NORMALIZE_21(a);
+                } else if (d[3] == 0) {
+                    /* Only foreground, demul src */
+                    d[0] = NR_DEMUL_221(r,a);
+                    d[1] = NR_DEMUL_221(g,a);
+                    d[2] = NR_DEMUL_221(b,a);
+                    d[3] = NR_NORMALIZE_21(a);
                                } else {
                                        unsigned int ca;
                                        /* Full composition */
-                                       ca = NR_COMPOSEA_112(a, d[3]);
-                                       d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca);
-                                       d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca);
-                                       d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca);
-                                       d[3] = NR_NORMALIZE_21(ca);
+                                       ca = NR_COMPOSEA_213(a, d[3]);
+                                       d[0] = NR_COMPOSEPNN_221131 (r, a, d[0], d[3], ca);
+                                       d[1] = NR_COMPOSEPNN_221131 (g, a, d[1], d[3], ca);
+                                       d[2] = NR_COMPOSEPNN_221131 (b, a, d[2], d[3], ca);
+                                       d[3] = NR_NORMALIZE_31(ca);
                                }
                        }
                        /* Advance pointers */
@@ -227,9 +239,7 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
        int x, y;
 
        size = (1 << dbits);
-    unsigned alpha_rounding_fix = size * 255;
-    unsigned rgb_rounding_fix = size * (255 * 256);
-    if (alpha > 127) ++alpha;
+    unsigned int rounding_fix = size/2;
 
        d0 = px;
        FFsx0 = FFd2s[4];
@@ -252,32 +262,30 @@ nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h
                                        sy = (long (FFsy >> (FBITS_HP - FBITS)) + FF_S[2 * i + 1]) >> FBITS;
                                        if ((sy >= 0) && (sy < sh)) {
                                                const unsigned char *s;
-                                               unsigned int ca;
                                                s = spx + sy * srs + sx * 4;
-                                               ca = NR_PREMUL_112(s[3], alpha);
-                                               r += NR_PREMUL_123(s[0], ca);
-                                               g += NR_PREMUL_123(s[1], ca);
-                                               b += NR_PREMUL_123(s[2], ca);
-                                               a += ca;
+                                               r += NR_PREMUL_112(s[0], s[3]);
+                                               g += NR_PREMUL_112(s[1], s[3]);
+                                               b += NR_PREMUL_112(s[2], s[3]);
+                                               a += s[3];
                                        }
                                }
                        }
-                       a = (a + alpha_rounding_fix) >> (8 + dbits);
+                       a = (a*alpha + rounding_fix) >> dbits;
                        if (a != 0) {
-                               r = (r + rgb_rounding_fix) >> (16 + dbits);
-                               g = (g + rgb_rounding_fix) >> (16 + dbits);
-                               b = (b + rgb_rounding_fix) >> (16 + dbits);
+                               r = (r + rounding_fix) >> dbits;
+                               g = (g + rounding_fix) >> dbits;
+                               b = (b + rounding_fix) >> dbits;
                                if ((a == 255) || (d[3] == 0)) {
                                        /* Transparent BG, premul src */
-                                       d[0] = r;
-                                       d[1] = g;
-                                       d[2] = b;
-                                       d[3] = a;
+                                       d[0] = NR_NORMALIZE_21(r);
+                                       d[1] = NR_NORMALIZE_21(g);
+                                       d[2] = NR_NORMALIZE_21(b);
+                                       d[3] = NR_NORMALIZE_21(a);
                                } else {
-                                       d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]);
-                                       d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]);
-                                       d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]);
-                                       d[3] = NR_COMPOSEA_111(a, d[3]);
+                                       d[0] = NR_COMPOSEPPP_2211 (r, a, d[0]);
+                                       d[1] = NR_COMPOSEPPP_2211 (g, a, d[1]);
+                                       d[2] = NR_COMPOSEPPP_2211 (b, a, d[2]);
+                                       d[3] = NR_COMPOSEA_211(a, d[3]);
                                }
                        }
                        /* Advance pointers */
@@ -302,11 +310,17 @@ void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, in
 
        if (alpha == 0) return;
 
-       dbits = xd + yd;
+    // Both alpha and color components are stored temporarily with a range of [0,255^2], so more supersampling and we get an overflow
+    if (xd+yd>16) {
+        xd = 8;
+        yd = 8;
+    }
+
+    dbits = xd + yd;
 
        for (i = 0; i < 6; i++) {
-               FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
-               FFd2s_HP[i] = (long long) (d2s[i] * (1 << FBITS_HP) + 0.5);;
+               FFd2s[i] = (long) floor(d2s[i] * (1 << FBITS) + 0.5);
+               FFd2s_HP[i] = (long long) floor(d2s[i] * (1 << FBITS_HP) + 0.5);;
        }
 
        if (dbits == 0) {