1 .file "nr-compose.c"
3 # Ensure Inkscape is execshield protected
4 .section .note.GNU-stack
5 .previous
7 .text
8 .align 2
9 .globl nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
10 .type nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,@function
12 /*
13 * This code is in public domain
14 *
15 * alpha 32(%ebp)
16 * srs 28(%ebp)
17 * spx 24(%ebp)
18 * rs 20(%ebp)
19 * h 16(%ebp)
20 * w 12(%ebp)
21 * px 8(%ebp)
22 * r -8(%ebp)
23 * g -12(%ebp)
24 * b -16(%ebp)
25 * a -20(%ebp)
26 * s -24(%ebp) -> %esi
27 * d -28(%ebp) -> %edi
28 * x -32(%ebp) -> %ebx
29 * y -36(%ebp)
30 * ca -40(%ebp)
31 *
32 * mm0 A
33 * mm1 FgA
34 * mm2 FgPre
35 * mm3
36 * mm4
37 * mm5 255
38 * mm6 128
39 * mm7 0
40 *
41 */
43 nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P:
44 pushl %ebp
45 movl %esp, %ebp
46 pushl %ebx
47 subl $36, %esp
48 pushl %edi
49 pushl %esi
51 /* Load %mm7 with [0 0 0 0] */
52 movl $0, %eax
53 movd %eax, %mm7
55 /* Load %mm6 with [128 128 128 128] */
56 movl $0x80808080, %eax
57 movd %eax, %mm6
58 punpcklbw %mm7, %mm6
60 /* Load %mm5 with [255 255 255 255] */
61 movl $0xffffffff, %eax
62 movd %eax, %mm5
63 punpcklbw %mm7, %mm5
65 /* Load %mm0 with [a a a a] */
66 /* Check full opacity */
67 movzbl 32(%ebp), %eax
68 cmpb $0xff, %al
69 jz .opaque
70 movd %eax, %mm0
71 punpcklwd %mm0, %mm0
72 punpckldq %mm0, %mm0
74 /* for (y = ...) */
75 movl 16(%ebp), %ecx
76 .fory:
78 /* d = px */
79 /* s = spx */
80 movl 8(%ebp), %edi
81 movl 24(%ebp), %esi
83 /* for (x = ...) */
84 movl 12(%ebp), %ebx
85 .forx:
87 /* Fg -> %mm1 */
88 /* fixme: Do we have to bother about alignment here? (Lauris) */
89 movl (%esi), %eax
90 testl $0xff000000, %eax
91 jz .clip
92 movd %eax, %mm1
93 punpcklbw %mm7, %mm1
95 /* [Fg * a] -> mm1 */
96 pmullw %mm0, %mm1
97 paddw %mm6, %mm1
98 movq %mm1, %mm2
99 psrlw $8, %mm2
100 paddw %mm2, %mm1
101 psrlw $8, %mm1
103 /* [255 - FgA] -> mm2 */
104 movq %mm1, %mm2
105 punpckhwd %mm2, %mm2
106 punpckhdq %mm2, %mm2
107 pxor %mm5, %mm2
109 /* Bg -> mm3 */
110 movd (%edi), %mm3
111 punpcklbw %mm7, %mm3
113 /* Fg + ((255 - FgA) * Bg) / 255 */
114 pmullw %mm2, %mm3
115 paddw %mm6, %mm3
116 movq %mm3, %mm4
117 psrlw $8, %mm4
118 paddw %mm4, %mm3
119 psrlw $8, %mm3
120 paddw %mm1, %mm3
122 /* Store pixel */
123 packuswb %mm3, %mm3
124 movd %mm3, %eax
125 movb %al, 0(%edi)
126 shrl $8, %eax
127 movb %al, 1(%edi)
128 shrl $8, %eax
129 movb %al, 2(%edi)
131 .clip:
132 addl $3, %edi
133 addl $4, %esi
135 decl %ebx
136 jnz .forx
138 movl 20(%ebp), %eax
139 addl %eax, 8(%ebp)
140 movl 28(%ebp), %eax
141 addl %eax, 24(%ebp)
143 decl %ecx
144 jnz .fory
146 .exit:
147 emms
148 popl %esi
149 popl %edi
150 addl $36, %esp
151 popl %ebx
152 popl %ebp
153 ret
155 .opaque:
156 /* for (y = ...) */
157 movl 16(%ebp), %ecx
158 .o_fory:
160 /* d = px */
161 /* s = spx */
162 movl 8(%ebp), %edi
163 movl 24(%ebp), %esi
165 /* for (x = ...) */
166 movl 12(%ebp), %ebx
167 .o_forx:
169 /* Fg -> %mm1 */
170 /* fixme: Do we have to bother about alignment here? (Lauris) */
171 movl (%esi), %eax
172 testl $0xff000000, %eax
173 jz .o_clip
174 cmpl $0xff000000, %eax
175 jnb .o_store
176 movd %eax, %mm1
177 punpcklbw %mm7, %mm1
179 /* [255 - FgA] -> mm2 */
180 movq %mm1, %mm2
181 punpckhwd %mm2, %mm2
182 punpckhdq %mm2, %mm2
183 pxor %mm5, %mm2
185 /* Bg -> mm3 */
186 movd (%edi), %mm3
187 punpcklbw %mm7, %mm3
189 /* Fg + ((255 - FgA) * Bg) / 255 */
190 pmullw %mm2, %mm3
191 paddw %mm6, %mm3
192 movq %mm3, %mm4
193 psrlw $8, %mm4
194 paddw %mm4, %mm3
195 psrlw $8, %mm3
196 paddw %mm1, %mm3
198 /* Store pixel */
199 packuswb %mm3, %mm3
200 movd %mm3, %eax
201 .o_store:
202 movb %al, 0(%edi)
203 shrl $8, %eax
204 movb %al, 1(%edi)
205 shrl $8, %eax
206 movb %al, 2(%edi)
208 .o_clip:
209 addl $3, %edi
210 addl $4, %esi
212 decl %ebx
213 jnz .o_forx
215 movl 20(%ebp), %eax
216 addl %eax, 8(%ebp)
217 movl 28(%ebp), %eax
218 addl %eax, 24(%ebp)
220 decl %ecx
221 jnz .o_fory
223 jmp .exit
225 .Lfe1:
226 .size nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,.Lfe1-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
227 .ident "GCC: (GNU) 3.2"