1 .file "nr-compose-transform.c"
3 # Ensure Inkscape is execshield protected
4 .section .note.GNU-stack
5 .previous
7 .text
8 .align 2
9 .globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0
10 .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0,@function
12 /*
13 * This code is in public domain
14 *
15 */
17 nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0:
18 pushl %ebp
19 movl %esp, %ebp
20 pushl %ebx
21 subl $48, %esp
22 pushl %edi
23 pushl %esi
25 /* Load %mm7 with [0 0 0 0] */
26 movl $0, %eax
27 movd %eax, %mm7
29 /* Load %mm6 with [128 128 128 128] */
30 movl $0x80808080, %eax
31 movd %eax, %mm6
32 punpcklbw %mm7, %mm6
34 /* Load %mm5 with [255 255 255 255] */
35 movl $0xffffffff, %eax
36 movd %eax, %mm5
37 punpcklbw %mm7, %mm5
39 /* Load %mm0 with [a a a a] */
40 movzbl 44(%ebp), %eax
41 movd %eax, %mm0
42 punpcklwd %mm0, %mm0
43 punpckldq %mm0, %mm0
45 movl 8(%ebp), %eax
46 movl %eax, -8(%ebp)
47 movl 40(%ebp), %eax
48 addl $16, %eax
49 movl (%eax), %eax
50 movl %eax, -12(%ebp)
51 movl 40(%ebp), %eax
52 addl $20, %eax
53 movl (%eax), %eax
54 movl %eax, -16(%ebp)
55 movl $0, -24(%ebp)
56 .L29:
57 movl -24(%ebp), %eax
58 cmpl 16(%ebp), %eax
59 jl .L32
60 jmp .L28
61 .L32:
62 movl -8(%ebp), %edi
64 movl -12(%ebp), %eax
65 movl %eax, %esi
66 movl -16(%ebp), %eax
67 movl %eax, -36(%ebp)
69 movl 12(%ebp), %ebx
70 .for_x_0:
72 movl %esi, %ecx
73 cmpl $0, %ecx
74 js .clip_0
75 sarl $12, %ecx
76 cmpl 28(%ebp), %ecx
77 jge .clip_0
78 shll $2, %ecx
80 movl -36(%ebp), %eax
81 cmpl $0, %eax
82 js .clip_0
83 sarl $12, %eax
84 cmpl 32(%ebp), %eax
85 jge .clip_0
86 imull 36(%ebp), %eax
88 addl %ecx, %eax
89 addl 24(%ebp), %eax
91 /* Fg -> %mm1 */
92 movl (%eax), %eax
93 testl $0xff000000, %eax
94 jz .clip_0
95 movd %eax, %mm1
96 punpcklbw %mm7, %mm1
98 /* [a a a 255] -> %mm3 */
99 shrl $24, %eax
100 movl $0x10101, %edx
101 mull %edx
102 orl $0xff000000, %eax
103 movd %eax, %mm3
104 punpcklbw %mm7, %mm3
106 /* [Fg * a] -> mm1 */
107 pmullw %mm3, %mm1
108 paddw %mm6, %mm1
109 movq %mm1, %mm4
110 psrlw $8, %mm4
111 paddw %mm4, %mm1
112 psrlw $8, %mm1
114 /* Multiply by alpha */
115 pmullw %mm0, %mm1
116 paddw %mm6, %mm1
117 movq %mm1, %mm4
118 psrlw $8, %mm4
119 paddw %mm4, %mm1
120 psrlw $8, %mm1
122 /* [255 - FgA] -> mm2 */
123 movq %mm1, %mm2
124 punpckhwd %mm2, %mm2
125 punpckhdq %mm2, %mm2
126 pxor %mm5, %mm2
128 /* Bg -> mm3 */
129 movd (%edi), %mm3
130 punpcklbw %mm7, %mm3
132 /* Fg + ((255 - FgA) * Bg) / 255 */
134 pmullw %mm2, %mm3
135 paddw %mm6, %mm3
136 movq %mm3, %mm4
137 psrlw $8, %mm4
138 paddw %mm4, %mm3
139 psrlw $8, %mm3
140 paddw %mm1, %mm3
142 /* Store pixel */
143 packuswb %mm3, %mm3
144 movd %mm3, (%edi)
146 .clip_0:
147 .L37:
148 movl 40(%ebp), %ecx
149 movl (%ecx), %edx
150 addl %edx, %esi
151 movl 4(%ecx), %edx
152 addl %edx, -36(%ebp)
154 addl $4, %edi
156 decl %ebx
157 jnz .for_x_0
159 .L34:
160 movl 8(%ecx), %edx
161 addl %edx, -12(%ebp)
162 movl 12(%ecx), %edx
163 addl %edx, -16(%ebp)
165 movl 20(%ebp), %edx
166 leal -8(%ebp), %eax
167 addl %edx, (%eax)
168 leal -24(%ebp), %eax
169 incl (%eax)
170 jmp .L29
171 .L28:
172 emms
173 popl %esi
174 popl %edi
175 addl $48, %esp
176 popl %ebx
177 popl %ebp
178 ret
179 .Lfe2:
180 .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0,.Lfe2-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0
182 /*
183 *
184 * dbits 52(%ebp)
185 * alpha 48(%ebp)
186 * FF_S 44(%ebp)
187 *
188 * d -32(%ebp) -> %edi
189 * i -60(%ebp) -> %esi
190 * sx -64(%ebp) -> %ebx
191 * sy -68(%ebp)
192 * s -72(%ebp)
193 *
194 * %mm0 a a a a
195 * %mm1 FgA
196 * %mm2 SumFgA
197 * %mm3 a a a 255
198 * %mm4
199 */
201 .align 2
202 .globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n
203 .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n,@function
204 nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n:
205 pushl %ebp
206 movl %esp, %ebp
207 pushl %ebx
208 subl $72, %esp
209 pushl %edi
210 pushl %esi
212 /* Load %mm7 with [0 0 0 0] */
213 movl $0, %eax
214 movd %eax, %mm7
216 /* Load %mm6 with [128 128 128 128] */
217 movl $0x80808080, %eax
218 movd %eax, %mm6
219 punpcklbw %mm7, %mm6
221 /* Load %mm5 with [255 255 255 255] */
222 movl $0xffffffff, %eax
223 movd %eax, %mm5
224 punpcklbw %mm7, %mm5
226 /* Load %mm0 with [a a a a] */
227 movzbl 48(%ebp), %eax
228 movd %eax, %mm0
229 punpcklwd %mm0, %mm0
230 punpckldq %mm0, %mm0
232 movl $1, %eax
233 movzbl 52(%ebp), %ecx
234 sall %cl, %eax
235 movl %eax, -8(%ebp)
236 movl 8(%ebp), %eax
237 movl %eax, -12(%ebp)
238 movl 40(%ebp), %eax
239 addl $16, %eax
240 movl (%eax), %eax
241 movl %eax, -16(%ebp)
242 movl 40(%ebp), %eax
243 addl $20, %eax
244 movl (%eax), %eax
245 movl %eax, -20(%ebp)
246 movl $0, -28(%ebp)
247 .L44:
248 movl -28(%ebp), %eax
249 cmpl 16(%ebp), %eax
250 jl .L47
251 jmp .exit_n
252 .L47:
253 movl -12(%ebp), %eax
254 movl %eax, -32(%ebp)
255 movl -16(%ebp), %eax
256 movl %eax, -36(%ebp)
257 movl -20(%ebp), %eax
258 movl %eax, -40(%ebp)
259 movl $0, -24(%ebp)
260 .L48:
261 movl -24(%ebp), %eax
262 cmpl 12(%ebp), %eax
263 jl .L51
264 jmp .L49
265 .L51:
267 /* Zero accumulator */
268 movq %mm7, %mm2
270 /* Set i to dptr (size - 1) */
271 movl -8(%ebp), %esi
272 sub $1, %esi
273 shll $3, %esi
275 movl 44(%ebp), %edi
276 movl -36(%ebp), %ecx
278 .for_i_n:
279 movl (%edi,%esi), %ebx
280 addl %ecx, %ebx
281 /* Test negative before shift */
282 cmpl $0, %ebx
283 js .next_i_n
284 sarl $12, %ebx
285 cmpl 28(%ebp), %ebx
286 jge .next_i_n
287 /* We multiply sx by 4 here */
288 shll $2, %ebx
290 movl 4(%edi,%esi), %eax
291 addl -40(%ebp), %eax
292 /* Test negative before shift */
293 cmpl $0, %eax
294 js .next_i_n
295 sarl $12, %eax
296 cmpl 32(%ebp), %eax
297 jge .next_i_n
298 /* We multiply sy by srs here */
299 imull 36(%ebp), %eax
301 addl %ebx, %eax
302 addl 24(%ebp), %eax
304 /* Fg -> %mm1 */
305 movl (%eax), %eax
306 testl $0xff000000, %eax
307 jz .next_i_n
308 movd %eax, %mm1
309 punpcklbw %mm7, %mm1
311 /* [a a a 255] -> %mm3 */
312 shrl $24, %eax
313 movl $0x10101, %edx
314 mull %edx
315 orl $0xff000000, %eax
316 movd %eax, %mm3
317 punpcklbw %mm7, %mm3
319 /* [Fg * a] -> mm1 */
320 pmullw %mm3, %mm1
321 paddw %mm6, %mm1
322 movq %mm1, %mm4
323 psrlw $8, %mm4
324 paddw %mm4, %mm1
325 psrlw $8, %mm1
327 /* Add to accumulator */
328 paddw %mm1, %mm2
330 .next_i_n:
331 subl $8, %esi
332 jnb .for_i_n
334 /* Divide components by sample size */
335 movd 52(%ebp), %mm3
336 psrlw %mm3, %mm2
338 /* Multiply by alpha */
339 pmullw %mm0, %mm2
340 paddw %mm6, %mm2
341 movq %mm2, %mm4
342 psrlw $8, %mm4
343 paddw %mm4, %mm2
344 psrlw $8, %mm2
346 /* [255 - FgA] -> mm1 */
347 movq %mm2, %mm1
348 punpckhwd %mm1, %mm1
349 punpckhdq %mm1, %mm1
350 pxor %mm5, %mm1
352 movl -32(%ebp), %edi
353 /* Bg -> mm3 */
354 movd (%edi), %mm3
355 punpcklbw %mm7, %mm3
357 /* Fg + ((255 - FgA) * Bg) / 255 */
359 pmullw %mm1, %mm3
360 paddw %mm6, %mm3
361 movq %mm3, %mm4
362 psrlw $8, %mm4
363 paddw %mm4, %mm3
364 psrlw $8, %mm3
365 paddw %mm2, %mm3
367 /* Store pixel */
368 packuswb %mm3, %mm3
369 movd %mm3, (%edi)
371 .L58:
372 movl 40(%ebp), %eax
373 movl (%eax), %edx
374 leal -36(%ebp), %eax
375 addl %edx, (%eax)
376 movl 40(%ebp), %eax
377 addl $4, %eax
378 movl (%eax), %edx
379 leal -40(%ebp), %eax
380 addl %edx, (%eax)
381 leal -32(%ebp), %eax
382 addl $4, (%eax)
383 leal -24(%ebp), %eax
384 incl (%eax)
385 jmp .L48
386 .L49:
387 movl 40(%ebp), %eax
388 addl $8, %eax
389 movl (%eax), %edx
390 leal -16(%ebp), %eax
391 addl %edx, (%eax)
392 movl 40(%ebp), %eax
393 addl $12, %eax
394 movl (%eax), %edx
395 leal -20(%ebp), %eax
396 addl %edx, (%eax)
397 movl 20(%ebp), %edx
398 leal -12(%ebp), %eax
399 addl %edx, (%eax)
400 leal -28(%ebp), %eax
401 incl (%eax)
402 jmp .L44
404 .exit_n:
405 emms
406 popl %esi
407 popl %edi
408 addl $72, %esp
409 popl %ebx
410 popl %ebp
411 ret
412 .Lfe3:
413 .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n,.Lfe3-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n
414 .ident "GCC: (GNU) 3.2"