1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
|
.file "nr-compose.c"
# Ensure Inkscape is execshield protected
.section .note.GNU-stack
.previous
.text
.align 2
.globl nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
.type nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,@function
/*
* This code is in public domain
*
* alpha 32(%ebp)
* srs 28(%ebp)
* spx 24(%ebp)
* rs 20(%ebp)
* h 16(%ebp)
* w 12(%ebp)
* px 8(%ebp)
* r -8(%ebp)
* g -12(%ebp)
* b -16(%ebp)
* a -20(%ebp)
* s -24(%ebp) -> %esi
* d -28(%ebp) -> %edi
* x -32(%ebp) -> %ebx
* y -36(%ebp)
* ca -40(%ebp)
*
* mm0 A
* mm1 FgA
* mm2 FgPre
* mm3
* mm4
* mm5 255
* mm6 128
* mm7 0
*
*/
nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P:
pushl %ebp
movl %esp, %ebp
pushl %ebx
subl $36, %esp
pushl %edi
pushl %esi
/* Load %mm7 with [0 0 0 0] */
movl $0, %eax
movd %eax, %mm7
/* Load %mm6 with [128 128 128 128] */
movl $0x80808080, %eax
movd %eax, %mm6
punpcklbw %mm7, %mm6
/* Load %mm5 with [255 255 255 255] */
movl $0xffffffff, %eax
movd %eax, %mm5
punpcklbw %mm7, %mm5
/* Load %mm0 with [a a a a] */
/* Check full opacity */
movzbl 32(%ebp), %eax
cmpb $0xff, %al
jz .opaque
movd %eax, %mm0
punpcklwd %mm0, %mm0
punpckldq %mm0, %mm0
/* for (y = ...) */
movl 16(%ebp), %ecx
.fory:
/* d = px */
/* s = spx */
movl 8(%ebp), %edi
movl 24(%ebp), %esi
/* for (x = ...) */
movl 12(%ebp), %ebx
.forx:
/* Fg -> %mm1 */
/* fixme: Do we have to bother about alignment here? (Lauris) */
movl (%esi), %eax
testl $0xff000000, %eax
jz .clip
movd %eax, %mm1
punpcklbw %mm7, %mm1
/* [Fg * a] -> mm1 */
pmullw %mm0, %mm1
paddw %mm6, %mm1
movq %mm1, %mm2
psrlw $8, %mm2
paddw %mm2, %mm1
psrlw $8, %mm1
/* [255 - FgA] -> mm2 */
movq %mm1, %mm2
punpckhwd %mm2, %mm2
punpckhdq %mm2, %mm2
pxor %mm5, %mm2
/* Bg -> mm3 */
movd (%edi), %mm3
punpcklbw %mm7, %mm3
/* Fg + ((255 - FgA) * Bg) / 255 */
pmullw %mm2, %mm3
paddw %mm6, %mm3
movq %mm3, %mm4
psrlw $8, %mm4
paddw %mm4, %mm3
psrlw $8, %mm3
paddw %mm1, %mm3
/* Store pixel */
packuswb %mm3, %mm3
movd %mm3, %eax
movb %al, 0(%edi)
shrl $8, %eax
movb %al, 1(%edi)
shrl $8, %eax
movb %al, 2(%edi)
.clip:
addl $3, %edi
addl $4, %esi
decl %ebx
jnz .forx
movl 20(%ebp), %eax
addl %eax, 8(%ebp)
movl 28(%ebp), %eax
addl %eax, 24(%ebp)
decl %ecx
jnz .fory
.exit:
emms
popl %esi
popl %edi
addl $36, %esp
popl %ebx
popl %ebp
ret
.opaque:
/* for (y = ...) */
movl 16(%ebp), %ecx
.o_fory:
/* d = px */
/* s = spx */
movl 8(%ebp), %edi
movl 24(%ebp), %esi
/* for (x = ...) */
movl 12(%ebp), %ebx
.o_forx:
/* Fg -> %mm1 */
/* fixme: Do we have to bother about alignment here? (Lauris) */
movl (%esi), %eax
testl $0xff000000, %eax
jz .o_clip
cmpl $0xff000000, %eax
jnb .o_store
movd %eax, %mm1
punpcklbw %mm7, %mm1
/* [255 - FgA] -> mm2 */
movq %mm1, %mm2
punpckhwd %mm2, %mm2
punpckhdq %mm2, %mm2
pxor %mm5, %mm2
/* Bg -> mm3 */
movd (%edi), %mm3
punpcklbw %mm7, %mm3
/* Fg + ((255 - FgA) * Bg) / 255 */
pmullw %mm2, %mm3
paddw %mm6, %mm3
movq %mm3, %mm4
psrlw $8, %mm4
paddw %mm4, %mm3
psrlw $8, %mm3
paddw %mm1, %mm3
/* Store pixel */
packuswb %mm3, %mm3
movd %mm3, %eax
.o_store:
movb %al, 0(%edi)
shrl $8, %eax
movb %al, 1(%edi)
shrl $8, %eax
movb %al, 2(%edi)
.o_clip:
addl $3, %edi
addl $4, %esi
decl %ebx
jnz .o_forx
movl 20(%ebp), %eax
addl %eax, 8(%ebp)
movl 28(%ebp), %eax
addl %eax, 24(%ebp)
decl %ecx
jnz .o_fory
jmp .exit
.Lfe1:
.size nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,.Lfe1-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
.ident "GCC: (GNU) 3.2"
|