1 .file "nr-compose.c"
3 # Ensure Inkscape is execshield protected
4 .section .note.GNU-stack
5 .previous
7 .text
8 .align 2
9 .globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
10 .type nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,@function
12 /*
13 * This code is in public domain
14 *
15 * c 32(%ebp)
16 * srs 28(%ebp)
17 * spx 24(%ebp)
18 * rs 20(%ebp)
19 * h 16(%ebp)
20 * w 12(%ebp)
21 * px 8(%ebp)
22 * r -8(%ebp)
23 * g -12(%ebp)
24 * b -16(%ebp)
25 * a -20(%ebp)
26 * s -24(%ebp) -> %esi
27 * d -28(%ebp) -> %edi
28 * x -32(%ebp) -> %ebx
29 * y -36(%ebp)
30 * ca -40(%ebp)
31 *
32 * mm0 Fg
33 * mm1 MMMM
34 * mm2 FgM
35 * mm3
36 * mm4
37 * mm5 255
38 * mm6 128
39 * mm7 0
40 *
41 */
43 nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP:
44 pushl %ebp
45 movl %esp, %ebp
46 pushl %ebx
47 subl $36, %esp
48 pushl %edi
49 pushl %esi
51 /* Load %mm7 with [0 0 0 0] */
52 movl $0, %eax
53 movd %eax, %mm7
55 /* Load %mm6 with [128 128 128 128] */
56 movl $0x80808080, %eax
57 movd %eax, %mm6
58 punpcklbw %mm7, %mm6
60 /* Load %mm5 with [255 255 255 255] */
61 movl $0xffffffff, %eax
62 movd %eax, %mm5
63 punpcklbw %mm7, %mm5
65 /* FgC -> %mm0 */
66 movl 32(%ebp), %eax
67 movd (%eax), %mm0
68 punpcklbw %mm7, %mm0
70 /* Check full opacity */
71 cmpb $0xff, %al
72 jz .opaque
74 /* for (y = ...) */
75 movl 16(%ebp), %ecx
76 .fory:
78 /* d = px */
79 /* s = spx */
80 movl 8(%ebp), %edi
81 movl 24(%ebp), %esi
83 /* for (x = ...) */
84 movl 12(%ebp), %ebx
85 .forx:
87 /* [m m m m] -> %mm1 */
88 movzbl (%esi), %eax
89 testb $0xff, %al
90 jz .clip
91 movd %eax, %mm1
92 punpcklwd %mm1, %mm1
93 punpckldq %mm1, %mm1
95 /* Fg -> mm2 */
96 movq %mm0, %mm2
97 pmullw %mm1, %mm2
98 paddw %mm6, %mm2
99 movq %mm2, %mm3
100 psrlw $8, %mm3
101 paddw %mm3, %mm2
102 psrlw $8, %mm2
104 /* [255 - FgA] -> mm1 */
105 movq %mm2, %mm1
106 punpckhwd %mm1, %mm1
107 punpckhdq %mm1, %mm1
108 pxor %mm5, %mm1
110 /* Bg -> mm3 */
111 movd (%edi), %mm3
112 punpcklbw %mm7, %mm3
114 /* Fg + ((255 - FgA) * Bg) / 255 */
115 pmullw %mm1, %mm3
116 paddw %mm6, %mm3
117 movq %mm3, %mm4
118 psrlw $8, %mm4
119 paddw %mm4, %mm3
120 psrlw $8, %mm3
121 paddw %mm2, %mm3
123 /* Store pixel */
124 packuswb %mm3, %mm3
125 movd %mm3, (%edi)
127 .clip:
128 addl $4, %edi
129 incl %esi
131 decl %ebx
132 jnz .forx
134 movl 20(%ebp), %eax
135 addl %eax, 8(%ebp)
136 movl 28(%ebp), %eax
137 addl %eax, 24(%ebp)
139 decl %ecx
140 jnz .fory
142 .exit:
143 emms
144 popl %esi
145 popl %edi
146 addl $36, %esp
147 popl %ebx
148 popl %ebp
149 ret
151 .opaque:
152 /* for (y = ...) */
153 movl 16(%ebp), %ecx
154 .o_fory:
156 /* d = px */
157 /* s = spx */
158 movl 8(%ebp), %edi
159 movl 24(%ebp), %esi
161 /* for (x = ...) */
162 movl 12(%ebp), %ebx
163 .o_forx:
165 /* [m m m m] -> %mm1 */
166 movzbl (%esi), %eax
167 testb $0xff, %al
168 jz .o_clip
169 cmpb $0xff, %al
170 jz .o_full
171 movd %eax, %mm1
172 punpcklwd %mm1, %mm1
173 punpckldq %mm1, %mm1
175 /* Fg -> mm2 */
176 movq %mm0, %mm2
177 pmullw %mm1, %mm2
178 paddw %mm6, %mm2
179 movq %mm2, %mm3
180 psrlw $8, %mm3
181 paddw %mm3, %mm2
182 psrlw $8, %mm2
184 /* [255 - FgA] -> mm1 */
185 movq %mm2, %mm1
186 punpckhwd %mm1, %mm1
187 punpckhdq %mm1, %mm1
188 pxor %mm5, %mm1
190 /* Bg -> mm3 */
191 movd (%edi), %mm3
192 punpcklbw %mm7, %mm3
194 /* Fg + ((255 - FgA) * Bg) / 255 */
195 pmullw %mm1, %mm3
196 paddw %mm6, %mm3
197 movq %mm3, %mm4
198 psrlw $8, %mm4
199 paddw %mm4, %mm3
200 psrlw $8, %mm3
201 paddw %mm2, %mm3
203 jmp .o_store
205 .o_full:
206 movq %mm0, %mm3
208 .o_store:
209 /* Store pixel */
210 packuswb %mm3, %mm3
211 movd %mm3, (%edi)
213 .o_clip:
214 addl $4, %edi
215 incl %esi
217 decl %ebx
218 jnz .o_forx
220 movl 20(%ebp), %eax
221 addl %eax, 8(%ebp)
222 movl 28(%ebp), %eax
223 addl %eax, 24(%ebp)
225 decl %ecx
226 jnz .o_fory
227 jmp .exit
229 .Lfe1:
230 .size nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
231 .ident "GCC: (GNU) 3.2"