summaryrefslogtreecommitdiff
path: root/patches/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
blob: 37261e5728f55aa75f64d62b77c19134240b8ada (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
	.file	"nr-compose.c"

# Ensure Inkscape is execshield protected
	.section .note.GNU-stack
	.previous
	
	.text
	.align 2
.globl nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
	.type	nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,@function

/*
 * This code is in public domain
 *
 * alpha 32(%ebp)
 * srs	 28(%ebp)
 * spx	 24(%ebp)
 * rs	 20(%ebp)
 * h	 16(%ebp)
 * w	 12(%ebp)
 * px	 8(%ebp)
 * r	-8(%ebp)
 * g	-12(%ebp)
 * b	-16(%ebp)
 * a	-20(%ebp)
 * s	-24(%ebp) -> %esi
 * d	-28(%ebp) -> %edi
 * x	-32(%ebp) -> %ebx
 * y	-36(%ebp)
 * ca	-40(%ebp)
 *
 * mm0 A
 * mm1 FgA
 * mm2 FgPre
 * mm3
 * mm4
 * mm5 255
 * mm6 128
 * mm7 0
 *
*/

nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P:
	pushl	%ebp
	movl	%esp, %ebp
	pushl	%ebx
	subl	$36, %esp
	pushl	%edi
	pushl	%esi

/* Load %mm7 with [0 0 0 0] */
	movl	$0, %eax
	movd	%eax, %mm7

/* Load %mm6 with [128 128 128 128] */
	movl	$0x80808080, %eax
	movd	%eax, %mm6
	punpcklbw %mm7, %mm6

/* Load %mm5 with [255 255 255 255] */
	movl	$0xffffffff, %eax
	movd	%eax, %mm5
	punpcklbw %mm7, %mm5

/* Load %mm0 with [a a a a] */
/* Check full opacity */
	movzbl	32(%ebp), %eax
	cmpb	$0xff, %al
	jz	.opaque
	movd	%eax, %mm0
	punpcklwd %mm0, %mm0
	punpckldq %mm0, %mm0

/* for (y = ...) */
	movl    16(%ebp), %ecx
.fory:

/* d = px */
/* s = spx */
	movl	8(%ebp), %edi
	movl	24(%ebp), %esi

/* for (x = ...) */
	movl	12(%ebp), %ebx
.forx:

/* Fg -> %mm1 */
/* fixme: Do we have to bother about alignment here? (Lauris) */
	movl	(%esi), %eax
	testl	$0xff000000, %eax
	jz	.clip
	movd	%eax, %mm1
	punpcklbw %mm7, %mm1

/* [Fg * a] -> mm1 */
	pmullw	%mm0, %mm1
	paddw	%mm6, %mm1
	movq	%mm1, %mm2
	psrlw	$8, %mm2
	paddw	%mm2, %mm1
	psrlw	$8, %mm1

/* [255 - FgA] -> mm2 */
	movq	%mm1, %mm2
	punpckhwd %mm2, %mm2
	punpckhdq %mm2, %mm2
	pxor	%mm5, %mm2

/* Bg -> mm3 */
	movd	(%edi), %mm3
	punpcklbw %mm7, %mm3

/* Fg + ((255 - FgA) * Bg) / 255 */
	pmullw	%mm2, %mm3
	paddw	%mm6, %mm3
	movq	%mm3, %mm4
	psrlw	$8, %mm4
	paddw	%mm4, %mm3
	psrlw	$8, %mm3
	paddw	%mm1, %mm3

/* Store pixel */
	packuswb %mm3, %mm3
	movd	%mm3, %eax
	movb	%al, 0(%edi)
	shrl	$8, %eax
	movb	%al, 1(%edi)
	shrl	$8, %eax
	movb	%al, 2(%edi)

.clip:
	addl	$3, %edi
	addl	$4, %esi

	decl	%ebx
	jnz	.forx

	movl	20(%ebp), %eax
	addl	%eax, 8(%ebp)
	movl	28(%ebp), %eax
	addl	%eax, 24(%ebp)

	decl	%ecx
	jnz	.fory

.exit:
	emms
	popl	%esi
	popl	%edi
	addl	$36, %esp
	popl	%ebx
	popl	%ebp
	ret

.opaque:
/* for (y = ...) */
	movl    16(%ebp), %ecx
.o_fory:

/* d = px */
/* s = spx */
	movl	8(%ebp), %edi
	movl	24(%ebp), %esi

/* for (x = ...) */
	movl	12(%ebp), %ebx
.o_forx:

/* Fg -> %mm1 */
/* fixme: Do we have to bother about alignment here? (Lauris) */
	movl	(%esi), %eax
	testl	$0xff000000, %eax
	jz	.o_clip
	cmpl	$0xff000000, %eax
	jnb	.o_store
	movd	%eax, %mm1
	punpcklbw %mm7, %mm1

/* [255 - FgA] -> mm2 */
	movq	%mm1, %mm2
	punpckhwd %mm2, %mm2
	punpckhdq %mm2, %mm2
	pxor	%mm5, %mm2

/* Bg -> mm3 */
	movd	(%edi), %mm3
	punpcklbw %mm7, %mm3

/* Fg + ((255 - FgA) * Bg) / 255 */
	pmullw	%mm2, %mm3
	paddw	%mm6, %mm3
	movq	%mm3, %mm4
	psrlw	$8, %mm4
	paddw	%mm4, %mm3
	psrlw	$8, %mm3
	paddw	%mm1, %mm3

/* Store pixel */
	packuswb %mm3, %mm3
	movd	%mm3, %eax
.o_store:
	movb	%al, 0(%edi)
	shrl	$8, %eax
	movb	%al, 1(%edi)
	shrl	$8, %eax
	movb	%al, 2(%edi)

.o_clip:
	addl	$3, %edi
	addl	$4, %esi

	decl	%ebx
	jnz	.o_forx

	movl	20(%ebp), %eax
	addl	%eax, 8(%ebp)
	movl	28(%ebp), %eax
	addl	%eax, 24(%ebp)

	decl	%ecx
	jnz	.o_fory

	jmp .exit

.Lfe1:
	.size	nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,.Lfe1-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
	.ident	"GCC: (GNU) 3.2"