Code

Patch by Diederik for 168384. Tested by myself and others and works well, additionall...
[inkscape.git] / src / libnr / nr-compose-transform.cpp
1 #define __NR_COMPOSE_TRANSFORM_C__
3 /*
4  * Pixel buffer rendering library
5  *
6  * Authors:
7  *   Lauris Kaplinski <lauris@kaplinski.com>
8  *
9  * This code is in public domain
10  */
12 #ifdef HAVE_CONFIG_H
13 # include "config.h"
14 #endif
16 #include "nr-pixops.h"
17 #include "nr-matrix.h"
20 #ifdef WITH_MMX
21 #ifdef __cplusplus
22 extern "C" {
23 #endif /* __cplusplus */
24 /* fixme: */
25 int nr_have_mmx (void);
26 void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
27                                                           const unsigned char *spx, int sw, int sh, int srs,
28                                                           const long *FFd2s, unsigned int alpha);
29 void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
30                                                           const unsigned char *spx, int sw, int sh, int srs,
31                                                           const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits);
32 #define NR_PIXOPS_MMX (1 && nr_have_mmx ())
33 #ifdef __cplusplus
34 }
35 #endif //__cplusplus
36 #endif
38 /* fixme: Implement missing (Lauris) */
39 /* fixme: PREMUL colors before calculating average (Lauris) */
41 /* Fixed point precision */
42 #define FBITS 12
43 #define FBITS_HP 18 // In some places we need a higher precision
45 void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
46                                                const unsigned char *spx, int sw, int sh, int srs,
47                                                const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
48 void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
49                                                const unsigned char *spx, int sw, int sh, int srs,
50                                                const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
51 void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
52                                                const unsigned char *spx, int sw, int sh, int srs,
53                                                const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
54 void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
55                                                const unsigned char *spx, int sw, int sh, int srs,
56                                                const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
58 void
59 nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
60                                                const unsigned char *spx, int sw, int sh, int srs,
61                                                const NR::Matrix &d2s, unsigned int alpha, int xd, int yd)
62 {
63         int xsize, ysize, size, dbits;
64         long FFs_x_x, FFs_x_y, FFs_y_x, FFs_y_y, FFs__x, FFs__y;
65         long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
66         /* Subpixel positions */
67         int FF_sx_S[256];
68         int FF_sy_S[256];
69         unsigned char *d0;
70         int FFsx0, FFsy0;
71         int x, y;
73         if (alpha == 0) return;
75         xsize = (1 << xd);
76         ysize = (1 << yd);
77         size = xsize * ysize;
78         dbits = xd + yd;
80         /* Set up fixed point matrix */
81         FFs_x_x = (long) (d2s[0] * (1 << FBITS) + 0.5);
82         FFs_x_y = (long) (d2s[1] * (1 << FBITS) + 0.5);
83         FFs_y_x = (long) (d2s[2] * (1 << FBITS) + 0.5);
84         FFs_y_y = (long) (d2s[3] * (1 << FBITS) + 0.5);
85         FFs__x = (long) (d2s[4] * (1 << FBITS) + 0.5);
86         FFs__y = (long) (d2s[5] * (1 << FBITS) + 0.5);
88         FFs_x_x_S = FFs_x_x >> xd;
89         FFs_x_y_S = FFs_x_y >> xd;
90         FFs_y_x_S = FFs_y_x >> yd;
91         FFs_y_y_S = FFs_y_y >> yd;
93         /* Set up subpixel matrix */
94         /* fixme: We can calculate that in floating point (Lauris) */
95         for (y = 0; y < ysize; y++) {
96                 for (x = 0; x < xsize; x++) {
97                         FF_sx_S[y * xsize + x] = FFs_x_x_S * x + FFs_y_x_S * y;
98                         FF_sy_S[y * xsize + x] = FFs_x_y_S * x + FFs_y_y_S * y;
99                 }
100         }
102         d0 = px;
103         FFsx0 = FFs__x;
104         FFsy0 = FFs__y;
106         for (y = 0; y < h; y++) {
107                 unsigned char *d;
108                 long FFsx, FFsy;
109                 d = d0;
110                 FFsx = FFsx0;
111                 FFsy = FFsy0;
112                 for (x = 0; x < w; x++) {
113                         unsigned int r, g, b, a;
114                         long sx, sy;
115                         int i;
116                         r = g = b = a = 0;
117                         for (i = 0; i < size; i++) {
118                                 sx = (FFsx + FF_sx_S[i]) >> FBITS;
119                                 if ((sx >= 0) && (sx < sw)) {
120                                         sy = (FFsy + FF_sy_S[i]) >> FBITS;
121                                         if ((sy >= 0) && (sy < sh)) {
122                                                 const unsigned char *s;
123                                                 unsigned int ca;
124                                                 s = spx + sy * srs + sx * 4;
125                                                 ca = NR_PREMUL_112 (s[3], alpha);
126                                                 r += NR_PREMUL_121 (s[0], ca);
127                                                 g += NR_PREMUL_121 (s[1], ca);
128                                                 b += NR_PREMUL_121 (s[2], ca);
129                                                 a += NR_NORMALIZE_21(ca);
130                                         }
131                                 }
132                         }
133                         a >>= dbits;
134                         if (a != 0) {
135                                 r = r >> dbits;
136                                 g = g >> dbits;
137                                 b = b >> dbits;
138                                 if (a == 255) {
139                                         /* Transparent BG, premul src */
140                                         d[0] = r;
141                                         d[1] = g;
142                                         d[2] = b;
143                                         d[3] = a;
144                                 } else {
145                                         unsigned int ca;
146                                         /* Full composition */
147                                         ca = NR_COMPOSEA_112(a, d[3]);
148                                         d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca);
149                                         d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca);
150                                         d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca);
151                                         d[3] = NR_NORMALIZE_21(ca);
152                                 }
153                         }
154                         /* Advance pointers */
155                         FFsx += FFs_x_x;
156                         FFsy += FFs_x_y;
157                         d += 4;
158                 }
159                 FFsx0 += FFs_y_x;
160                 FFsy0 += FFs_y_y;
161                 d0 += rs;
162         }
165 void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
166                                                     const unsigned char *spx, int sw, int sh, int srs,
167                                                     const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
169 static void
170 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
171                                                  const unsigned char *spx, int sw, int sh, int srs,
172                                                  const long long *FFd2s, unsigned int alpha)
174     unsigned char *d0;
175         long long FFsx0, FFsy0;
176         int x, y;
178         d0 = px;
179         FFsx0 = FFd2s[4];
180         FFsy0 = FFd2s[5];
182         for (y = 0; y < h; y++) {
183                 unsigned char *d;
184                 long long FFsx, FFsy;
185                 d = d0;
186                 FFsx = FFsx0;
187                 FFsy = FFsy0;
188                 for (x = 0; x < w; x++) {
189                         long sx, sy;
190                         sx = long(FFsx >> FBITS_HP);
191                         if ((sx >= 0) && (sx < sw)) {
192                                 sy = long(FFsy >> FBITS_HP);
193                                 if ((sy >= 0) && (sy < sh)) {
194                                         const unsigned char *s;
195                                         unsigned int a;
196                                         s = spx + sy * srs + sx * 4;
197                                         a = NR_PREMUL_112 (s[3], alpha);
198                                         if (a != 0) {
199                                                 if ((a == 255*255) || (d[3] == 0)) {
200                                                         /* Transparent BG, premul src */
201                                                         d[0] = NR_PREMUL_121 (s[0], a);
202                                                         d[1] = NR_PREMUL_121 (s[1], a);
203                                                         d[2] = NR_PREMUL_121 (s[2], a);
204                                                         d[3] = NR_NORMALIZE_21(a);
205                                                 } else {
206                                                         d[0] = NR_COMPOSENPP_1211 (s[0], a, d[0]);
207                                                         d[1] = NR_COMPOSENPP_1211 (s[1], a, d[1]);
208                                                         d[2] = NR_COMPOSENPP_1211 (s[2], a, d[2]);
209                                                         d[3] = NR_COMPOSEA_211(a, d[3]);
210                                                 }
211                                         }
212                                 }
213                         }
214                         /* Advance pointers */
215                         FFsx += FFd2s[0];
216                         FFsy += FFd2s[1];
217                         d += 4;
218                 }
219                 FFsx0 += FFd2s[2];
220                 FFsy0 += FFd2s[3];
221                 d0 += rs;
222         }
225 static void
226 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
227                                                  const unsigned char *spx, int sw, int sh, int srs,
228                                                  const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
230         int size;
231         unsigned char *d0;
232         long long FFsx0, FFsy0;
233         int x, y;
235         size = (1 << dbits);
236     unsigned alpha_rounding_fix = size * 255;
237     unsigned rgb_rounding_fix = size * (255 * 256);
238     if (alpha > 127) ++alpha;
240         d0 = px;
241         FFsx0 = FFd2s[4];
242         FFsy0 = FFd2s[5];
244         for (y = 0; y < h; y++) {
245                 unsigned char *d;
246                 long long FFsx, FFsy;
247                 d = d0;
248                 FFsx = FFsx0;
249                 FFsy = FFsy0;
250                 for (x = 0; x < w; x++) {
251                         unsigned int r, g, b, a;
252                         int i;
253                         r = g = b = a = 0;
254                         for (i = 0; i < size; i++) {
255                                 long sx, sy;
256                                 sx = (FFsx >> FBITS_HP) + (FF_S[2 * i] >> FBITS);
257                                 if ((sx >= 0) && (sx < sw)) {
258                                         sy = (FFsy >> FBITS_HP) + (FF_S[2 * i + 1] >> FBITS);
259                                         if ((sy >= 0) && (sy < sh)) {
260                                                 const unsigned char *s;
261                                                 unsigned int ca;
262                                                 s = spx + sy * srs + sx * 4;
263                                                 ca = NR_PREMUL_112(s[3], alpha);
264                                                 r += NR_PREMUL_123(s[0], ca);
265                                                 g += NR_PREMUL_123(s[1], ca);
266                                                 b += NR_PREMUL_123(s[2], ca);
267                                                 a += ca;
268                                         }
269                                 }
270                         }
271                         a = (a + alpha_rounding_fix) >> (8 + dbits);
272                         if (a != 0) {
273                                 r = (r + rgb_rounding_fix) >> (16 + dbits);
274                                 g = (g + rgb_rounding_fix) >> (16 + dbits);
275                                 b = (b + rgb_rounding_fix) >> (16 + dbits);
276                                 if ((a == 255) || (d[3] == 0)) {
277                                         /* Transparent BG, premul src */
278                                         d[0] = r;
279                                         d[1] = g;
280                                         d[2] = b;
281                                         d[3] = a;
282                                 } else {
283                                         d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]);
284                                         d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]);
285                                         d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]);
286                                         d[3] = NR_COMPOSEA_111(a, d[3]);
287                                 }
288                         }
289                         /* Advance pointers */
290                         FFsx += FFd2s[0];
291                         FFsy += FFd2s[1];
292                         d += 4;
293                 }
294                 FFsx0 += FFd2s[2];
295                 FFsy0 += FFd2s[3];
296                 d0 += rs;
297         }
300 void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
301                                                     const unsigned char *spx, int sw, int sh, int srs,
302                                                     const NR::Matrix &d2s, unsigned int alpha, int xd, int yd)
304         int dbits;
305         long FFd2s[6];
306         long long FFd2s_HP[6]; // with higher precision
307         int i;
309         if (alpha == 0) return;
311         dbits = xd + yd;
313         for (i = 0; i < 6; i++) {
314                 FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
315                 FFd2s_HP[i] = (long long) (d2s[i] * (1 << FBITS_HP) + 0.5);;
316         }
318         if (dbits == 0) {
319 #ifdef WITH_MMX
320                 if (NR_PIXOPS_MMX) {
321                         /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
322                         nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
323                         return;
324                 }
325 #endif
326                 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha);
327         } else {
328                 int xsize, ysize;
329                 long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
330                 long FF_S[2 * 256];
331                 int x, y;
333                 xsize = (1 << xd);
334                 ysize = (1 << yd);
336                 FFs_x_x_S = FFd2s[0] >> xd;
337                 FFs_x_y_S = FFd2s[1] >> xd;
338                 FFs_y_x_S = FFd2s[2] >> yd;
339                 FFs_y_y_S = FFd2s[3] >> yd;
341                 /* Set up subpixel matrix */
342                 /* fixme: We can calculate that in floating point (Lauris) */
343                 for (y = 0; y < ysize; y++) {
344                         for (x = 0; x < xsize; x++) {
345                                 FF_S[2 * (y * xsize + x)] = FFs_x_x_S * x + FFs_y_x_S * y;
346                                 FF_S[2 * (y * xsize + x) + 1] = FFs_x_y_S * x + FFs_y_y_S * y;
347                         }
348                 }
350 #ifdef WITH_MMX
351                 if (NR_PIXOPS_MMX) {
352                         /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
353                         nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
354                         return;
355                 }
356 #endif
357                 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits);
358         }
361 void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
362                                                     const unsigned char *spx, int sw, int sh, int srs,
363                                                     const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);