1 #define __NR_COMPOSE_TRANSFORM_C__
3 /*
4 * Pixel buffer rendering library
5 *
6 * Authors:
7 * Lauris Kaplinski <lauris@kaplinski.com>
8 *
9 * This code is in public domain
10 */
12 #ifdef HAVE_CONFIG_H
13 # include "config.h"
14 #endif
16 #include "nr-pixops.h"
17 #include "nr-matrix.h"
20 #ifdef WITH_MMX
21 #ifdef __cplusplus
22 extern "C" {
23 #endif /* __cplusplus */
24 /* fixme: */
25 int nr_have_mmx (void);
26 void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
27 const unsigned char *spx, int sw, int sh, int srs,
28 const long *FFd2s, unsigned int alpha);
29 void nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
30 const unsigned char *spx, int sw, int sh, int srs,
31 const long *FFd2s, const long *FF_S, unsigned int alpha, int dbits);
32 #define NR_PIXOPS_MMX (1 && nr_have_mmx ())
33 #ifdef __cplusplus
34 }
35 #endif //__cplusplus
36 #endif
38 /* fixme: Implement missing (Lauris) */
39 /* fixme: PREMUL colors before calculating average (Lauris) */
41 /* Fixed point precision */
42 #define FBITS 12
43 #define FBITS_HP 18 // In some places we need a higher precision
45 void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
46 const unsigned char *spx, int sw, int sh, int srs,
47 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
48 void nr_R8G8B8A8_N_EMPTY_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
49 const unsigned char *spx, int sw, int sh, int srs,
50 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
51 void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
52 const unsigned char *spx, int sw, int sh, int srs,
53 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
54 void nr_R8G8B8A8_P_EMPTY_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
55 const unsigned char *spx, int sw, int sh, int srs,
56 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
58 void
59 nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
60 const unsigned char *spx, int sw, int sh, int srs,
61 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd)
62 {
63 int xsize, ysize, size, dbits;
64 long FFs_x_x, FFs_x_y, FFs_y_x, FFs_y_y, FFs__x, FFs__y;
65 long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
66 /* Subpixel positions */
67 int FF_sx_S[256];
68 int FF_sy_S[256];
69 unsigned char *d0;
70 int FFsx0, FFsy0;
71 int x, y;
73 if (alpha == 0) return;
75 xsize = (1 << xd);
76 ysize = (1 << yd);
77 size = xsize * ysize;
78 dbits = xd + yd;
80 /* Set up fixed point matrix */
81 FFs_x_x = (long) (d2s[0] * (1 << FBITS) + 0.5);
82 FFs_x_y = (long) (d2s[1] * (1 << FBITS) + 0.5);
83 FFs_y_x = (long) (d2s[2] * (1 << FBITS) + 0.5);
84 FFs_y_y = (long) (d2s[3] * (1 << FBITS) + 0.5);
85 FFs__x = (long) (d2s[4] * (1 << FBITS) + 0.5);
86 FFs__y = (long) (d2s[5] * (1 << FBITS) + 0.5);
88 FFs_x_x_S = FFs_x_x >> xd;
89 FFs_x_y_S = FFs_x_y >> xd;
90 FFs_y_x_S = FFs_y_x >> yd;
91 FFs_y_y_S = FFs_y_y >> yd;
93 /* Set up subpixel matrix */
94 /* fixme: We can calculate that in floating point (Lauris) */
95 for (y = 0; y < ysize; y++) {
96 for (x = 0; x < xsize; x++) {
97 FF_sx_S[y * xsize + x] = FFs_x_x_S * x + FFs_y_x_S * y;
98 FF_sy_S[y * xsize + x] = FFs_x_y_S * x + FFs_y_y_S * y;
99 }
100 }
102 d0 = px;
103 FFsx0 = FFs__x;
104 FFsy0 = FFs__y;
106 for (y = 0; y < h; y++) {
107 unsigned char *d;
108 long FFsx, FFsy;
109 d = d0;
110 FFsx = FFsx0;
111 FFsy = FFsy0;
112 for (x = 0; x < w; x++) {
113 unsigned int r, g, b, a;
114 long sx, sy;
115 int i;
116 r = g = b = a = 0;
117 for (i = 0; i < size; i++) {
118 sx = (FFsx + FF_sx_S[i]) >> FBITS;
119 if ((sx >= 0) && (sx < sw)) {
120 sy = (FFsy + FF_sy_S[i]) >> FBITS;
121 if ((sy >= 0) && (sy < sh)) {
122 const unsigned char *s;
123 unsigned int ca;
124 s = spx + sy * srs + sx * 4;
125 ca = NR_PREMUL_112 (s[3], alpha);
126 r += NR_PREMUL_121 (s[0], ca);
127 g += NR_PREMUL_121 (s[1], ca);
128 b += NR_PREMUL_121 (s[2], ca);
129 a += NR_NORMALIZE_21(ca);
130 }
131 }
132 }
133 a >>= dbits;
134 if (a != 0) {
135 r = r >> dbits;
136 g = g >> dbits;
137 b = b >> dbits;
138 if (a == 255) {
139 /* Transparent BG, premul src */
140 d[0] = r;
141 d[1] = g;
142 d[2] = b;
143 d[3] = a;
144 } else {
145 unsigned int ca;
146 /* Full composition */
147 ca = NR_COMPOSEA_112(a, d[3]);
148 d[0] = NR_COMPOSENNN_111121 (r, a, d[0], d[3], ca);
149 d[1] = NR_COMPOSENNN_111121 (g, a, d[1], d[3], ca);
150 d[2] = NR_COMPOSENNN_111121 (b, a, d[2], d[3], ca);
151 d[3] = NR_NORMALIZE_21(ca);
152 }
153 }
154 /* Advance pointers */
155 FFsx += FFs_x_x;
156 FFsy += FFs_x_y;
157 d += 4;
158 }
159 FFsx0 += FFs_y_x;
160 FFsy0 += FFs_y_y;
161 d0 += rs;
162 }
163 }
165 void nr_R8G8B8A8_N_R8G8B8A8_N_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
166 const unsigned char *spx, int sw, int sh, int srs,
167 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);
169 static void
170 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (unsigned char *px, int w, int h, int rs,
171 const unsigned char *spx, int sw, int sh, int srs,
172 const long long *FFd2s, unsigned int alpha)
173 {
174 unsigned char *d0;
175 long long FFsx0, FFsy0;
176 int x, y;
178 d0 = px;
179 FFsx0 = FFd2s[4];
180 FFsy0 = FFd2s[5];
182 for (y = 0; y < h; y++) {
183 unsigned char *d;
184 long long FFsx, FFsy;
185 d = d0;
186 FFsx = FFsx0;
187 FFsy = FFsy0;
188 for (x = 0; x < w; x++) {
189 long sx, sy;
190 sx = long(FFsx >> FBITS_HP);
191 if ((sx >= 0) && (sx < sw)) {
192 sy = long(FFsy >> FBITS_HP);
193 if ((sy >= 0) && (sy < sh)) {
194 const unsigned char *s;
195 unsigned int a;
196 s = spx + sy * srs + sx * 4;
197 a = NR_PREMUL_112 (s[3], alpha);
198 if (a != 0) {
199 if ((a == 255*255) || (d[3] == 0)) {
200 /* Transparent BG, premul src */
201 d[0] = NR_PREMUL_121 (s[0], a);
202 d[1] = NR_PREMUL_121 (s[1], a);
203 d[2] = NR_PREMUL_121 (s[2], a);
204 d[3] = NR_NORMALIZE_21(a);
205 } else {
206 d[0] = NR_COMPOSENPP_1211 (s[0], a, d[0]);
207 d[1] = NR_COMPOSENPP_1211 (s[1], a, d[1]);
208 d[2] = NR_COMPOSENPP_1211 (s[2], a, d[2]);
209 d[3] = NR_COMPOSEA_211(a, d[3]);
210 }
211 }
212 }
213 }
214 /* Advance pointers */
215 FFsx += FFd2s[0];
216 FFsy += FFd2s[1];
217 d += 4;
218 }
219 FFsx0 += FFd2s[2];
220 FFsy0 += FFd2s[3];
221 d0 += rs;
222 }
223 }
225 static void
226 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (unsigned char *px, int w, int h, int rs,
227 const unsigned char *spx, int sw, int sh, int srs,
228 const long long *FFd2s, const long *FF_S, unsigned int alpha, int dbits)
229 {
230 int size;
231 unsigned char *d0;
232 long long FFsx0, FFsy0;
233 int x, y;
235 size = (1 << dbits);
236 unsigned alpha_rounding_fix = size * 255;
237 unsigned rgb_rounding_fix = size * (255 * 256);
238 if (alpha > 127) ++alpha;
240 d0 = px;
241 FFsx0 = FFd2s[4];
242 FFsy0 = FFd2s[5];
244 for (y = 0; y < h; y++) {
245 unsigned char *d;
246 long long FFsx, FFsy;
247 d = d0;
248 FFsx = FFsx0;
249 FFsy = FFsy0;
250 for (x = 0; x < w; x++) {
251 unsigned int r, g, b, a;
252 int i;
253 r = g = b = a = 0;
254 for (i = 0; i < size; i++) {
255 long sx, sy;
256 sx = (FFsx >> FBITS_HP) + (FF_S[2 * i] >> FBITS);
257 if ((sx >= 0) && (sx < sw)) {
258 sy = (FFsy >> FBITS_HP) + (FF_S[2 * i + 1] >> FBITS);
259 if ((sy >= 0) && (sy < sh)) {
260 const unsigned char *s;
261 unsigned int ca;
262 s = spx + sy * srs + sx * 4;
263 ca = NR_PREMUL_112(s[3], alpha);
264 r += NR_PREMUL_123(s[0], ca);
265 g += NR_PREMUL_123(s[1], ca);
266 b += NR_PREMUL_123(s[2], ca);
267 a += ca;
268 }
269 }
270 }
271 a = (a + alpha_rounding_fix) >> (8 + dbits);
272 if (a != 0) {
273 r = (r + rgb_rounding_fix) >> (16 + dbits);
274 g = (g + rgb_rounding_fix) >> (16 + dbits);
275 b = (b + rgb_rounding_fix) >> (16 + dbits);
276 if ((a == 255) || (d[3] == 0)) {
277 /* Transparent BG, premul src */
278 d[0] = r;
279 d[1] = g;
280 d[2] = b;
281 d[3] = a;
282 } else {
283 d[0] = NR_COMPOSEPPP_1111 (r, a, d[0]);
284 d[1] = NR_COMPOSEPPP_1111 (g, a, d[1]);
285 d[2] = NR_COMPOSEPPP_1111 (b, a, d[2]);
286 d[3] = NR_COMPOSEA_111(a, d[3]);
287 }
288 }
289 /* Advance pointers */
290 FFsx += FFd2s[0];
291 FFsy += FFd2s[1];
292 d += 4;
293 }
294 FFsx0 += FFd2s[2];
295 FFsy0 += FFd2s[3];
296 d0 += rs;
297 }
298 }
300 void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM (unsigned char *px, int w, int h, int rs,
301 const unsigned char *spx, int sw, int sh, int srs,
302 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd)
303 {
304 int dbits;
305 long FFd2s[6];
306 long long FFd2s_HP[6]; // with higher precision
307 int i;
309 if (alpha == 0) return;
311 dbits = xd + yd;
313 for (i = 0; i < 6; i++) {
314 FFd2s[i] = (long) (d2s[i] * (1 << FBITS) + 0.5);
315 FFd2s_HP[i] = (long long) (d2s[i] * (1 << FBITS_HP) + 0.5);;
316 }
318 if (dbits == 0) {
319 #ifdef WITH_MMX
320 if (NR_PIXOPS_MMX) {
321 /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
322 nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s, alpha);
323 return;
324 }
325 #endif
326 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0 (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, alpha);
327 } else {
328 int xsize, ysize;
329 long FFs_x_x_S, FFs_x_y_S, FFs_y_x_S, FFs_y_y_S;
330 long FF_S[2 * 256];
331 int x, y;
333 xsize = (1 << xd);
334 ysize = (1 << yd);
336 FFs_x_x_S = FFd2s[0] >> xd;
337 FFs_x_y_S = FFd2s[1] >> xd;
338 FFs_y_x_S = FFd2s[2] >> yd;
339 FFs_y_y_S = FFd2s[3] >> yd;
341 /* Set up subpixel matrix */
342 /* fixme: We can calculate that in floating point (Lauris) */
343 for (y = 0; y < ysize; y++) {
344 for (x = 0; x < xsize; x++) {
345 FF_S[2 * (y * xsize + x)] = FFs_x_x_S * x + FFs_y_x_S * y;
346 FF_S[2 * (y * xsize + x) + 1] = FFs_x_y_S * x + FFs_y_y_S * y;
347 }
348 }
350 #ifdef WITH_MMX
351 if (NR_PIXOPS_MMX) {
352 /* WARNING: MMX composer REQUIRES w > 0 and h > 0 */
353 nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s, FF_S, alpha, dbits);
354 return;
355 }
356 #endif
357 nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n (px, w, h, rs, spx, sw, sh, srs, FFd2s_HP, FF_S, alpha, dbits);
358 }
359 }
361 void nr_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_P_TRANSFORM (unsigned char *px, int w, int h, int rs,
362 const unsigned char *spx, int sw, int sh, int srs,
363 const NR::Matrix &d2s, unsigned int alpha, int xd, int yd);