Drop the MMX assembly optimizations

We haven't built them on anything that isn't a 32bit IA platform, and we could probably get better mileage out of the currently implemented pixops just by rearranging the C code and letting compilers do the optimizations for us. We should definitely consider either using pixman directly, or replacing slow pixops with SSE builtins, instead.
author: Emmanuele Bassi <ebassi@gnome.org> 2018-04-28 11:36:19 +0100
committer: Emmanuele Bassi <ebassi@gnome.org> 2018-04-30 18:11:40 +0100
commit: 4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f (patch)
tree: 4213bfe3fca3055cd7691a57bdba5de083458dcd /gdk-pixbuf/pixops
parent: b4acb66c1e026fc3cd8a6d1ac961f68fb6fb91e2 (diff)
download: gdk-pixbuf-4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f.tar.gz
8 files changed, 3 insertions, 1014 deletions
diff --git a/gdk-pixbuf/pixops/DETAILS b/gdk-pixbuf/pixops/DETAILS
index acf16f57e..08597f5f7 100644
--- a/gdk-pixbuf/pixops/DETAILS
+++ b/gdk-pixbuf/pixops/DETAILS
@@ -280,76 +280,3 @@ Integer tricks for compositing
 
 
 
-MMX Code
-========
-
-Line functions are provided in MMX functionsfor a few special 
-cases:
-
- n_x = n_y = 2
-
-   src_channels = 3 dest_channels = 3    op = scale
-   src_channels = 4 with alpha dest_channels = 4 no alpha  op = composite
-   src_channels = 4 with alpha dest_channels = 4 no alpha  op = composite_color
-
-For the case n_x = n_y = 2 - primarily hit when scaling up with bilinear
-scaling, we can take advantage of the fact that multiple destination
-pixels will be composed from the same source pixels.
-
-That is a destination pixel is a linear combination of the source
-pixels around it:
-
-
-  S0                     S1
-
-
-
-
-
-       D  D' D'' ...
-
-
-
-
-  S2                     S3
-
-Each mmx register is 64 bits wide, so we can unpack a source pixel
-into the low 8 bits of 4 16 bit words, and store it into a mmx 
-register.
-
-For each destination pixel, we first make sure that we have pixels S0
-... S3 loaded into registers mm0 ...mm3. (This will often involve not
-doing anything or moving mm1 and mm3 into mm0 and mm1 then reloading
-mm1 and mm3 with new values).
-
-Then we load up the appropriate weights for the 4 corner pixels
-based on the offsets of the destination pixel within the source
-pixels.
-
-We have preexpanded the weights to 64 bits wide and truncated the
-range to 8 bits, so an original filter value of 
-
- 0x5321 would be expanded to
-
- 0x0053005300530053
-
-For source buffers without alpha, we simply do a multiply-add
-of the weights, giving us a 16 bit quantity for the result
-that we shift left by 8 and store in the destination buffer.
-
-When the source buffer has alpha, then things become more
-complicated - when we load up mm0 and mm3, we premultiply
-the alpha, so they contain:
-
- (a*ff >> 8) (r*a >> 8) (g*a >> 8) (b*a >> a)
-
-Then when we multiply by the weights, and add we end up
-with premultiplied r,g,b,a in the range of 0 .. 0xff * 0ff,
-call them A,R,G,B
-
-We then need to composite with the dest pixels - which 
-we do by:
-
- r_dest = (R + ((0xff * 0xff - A) >> 8) * r_dest) >> 8
-
-(0xff * 0xff) 
diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README
index 354c3a197..382fca128 100644
--- a/gdk-pixbuf/pixops/README
+++ b/gdk-pixbuf/pixops/README
@@ -138,23 +138,6 @@ TODO
   switching around conditionals and inner loops in various
   places.
 
-* Right now, in several of the most common cases, there are
-  optimized mmx routines, but no optimized C routines.
-
-  For instance, there is a 
-
-    pixops_composite_line_22_4a4_mmx()
-
-  But no 
-  
-    pixops_composite_line_22_4a4()
-
-  Also, it may be desirable to include a few more special cases - in particular:
-
-    pixops_composite_line_22_4a3()
-
-  May be desirable.
-
 * Scaling down images by large scale factors is _slow_ since huge filter
   matrixes are computed. (e.g., to scale down by a factor of 100, we compute
   101x101 filter matrixes. At some point, it would be more efficent to
diff --git a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S
deleted file mode 100644
index c062cad91..000000000
--- a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-	.file	"composite_line_22_4a4_mmx.S"
-	.version	"01.01"
-gcc2_compiled.:
-.text
-	.align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)	
-	
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack;  .previous
-#endif
-	
-.globl _pixops_composite_line_22_4a4_mmx
-	.type	 _pixops_composite_line_22_4a4_mmx,@function
-_pixops_composite_line_22_4a4_mmx:
-	
-#else
-	
-.globl __pixops_composite_line_22_4a4_mmx
-__pixops_composite_line_22_4a4_mmx:
-	
-#endif
-/*
- * Arguments
- *		
- * weights:	 8(%ebp)
- * p:	        12(%ebp)	%esi
- * q1:	        16(%ebp)	
- * q2:	        20(%ebp)	
- * xstep:       24(%ebp)	
- * p_end:       28(%ebp)
- * xinit:       32(%ebp)
- *	
-*/
-/*
- * Function call entry
- */
-	pushl %ebp
-	movl %esp,%ebp
-	subl $28,%esp
-	pushl %edi
-	pushl %esi
-	pushl %ebx
-/* Locals:	
- * int x                      %ebx
- * int x_scaled             -24(%ebp)
- */
-
-/*
- * Setup
- */
-/* Initialize variables */	
-	movl 32(%ebp),%ebx
-	movl 32(%ebp),%edx
-	sarl $16,%edx
-	movl 12(%ebp),%esi
-
-	movl %edx,-24(%ebp)
-
-	cmpl 28(%ebp),%esi
-	jnb  .out
-
-/* Load initial values into %mm1, %mm3 */
-	shll $2, %edx
-
-	pxor %mm4, %mm4
-	
-	movl 16(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-	orl  $0xff000000, %eax
-	movd %eax, %mm1
-	punpcklbw %mm4, %mm1
-	pmullw %mm5,%mm1
-
-	movl -24(%ebp),%edx
-	shll $2, %edx
-		
-	movl 20(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-	orl  $0xff000000, %eax
-	movd %eax, %mm3
-	punpcklbw %mm4, %mm3
-	pmullw %mm5,%mm3
-
-	psrlw $8,%mm1
-	psrlw $8,%mm3
-
-	addl $65536,%ebx
-	movl %ebx,%edx
-	sarl $16,%edx
-
-	jmp .newx
-	.p2align 4,,7
-.loop:
-/* int x_index = (x & 0xf000) >> 12 */
-	movl %ebx,%eax
-	andl $0xf000,%eax
-	shrl $7,%eax
-
-	movq (%edi,%eax),%mm4
-	pmullw %mm0,%mm4
-	movq 8(%edi,%eax),%mm5
-	pmullw %mm1,%mm5
-	movq 16(%edi,%eax),%mm6
-	movq 24(%edi,%eax),%mm7
-	pmullw %mm2,%mm6
-	pmullw %mm3,%mm7
-	paddw %mm4, %mm5
-	paddw %mm6, %mm7
-	paddw %mm5, %mm7
-
-	movl $0xffff,%ecx
-	movd %ecx,%mm4
-	psllq $48,%mm4
-	movq %mm4,%mm6
-	psubw %mm7,%mm4
-	pand %mm6,%mm4
-	
-	movq %mm4,%mm5
-	psrlq $16,%mm4
-	por %mm4,%mm5
-	psrlq $32,%mm5
-	por %mm4,%mm5
-	
-	psrlw $8,%mm5
-
-	movd (%esi),%mm7
-	pxor %mm4,%mm4
-	punpcklbw %mm4, %mm7
-		
-	pmullw %mm7,%mm5
-
-/* x += x_step; */
-	addl 24(%ebp),%ebx
-/* x_scale = x >> 16; */
-	movl %ebx,%edx
-	sarl $16,%edx
-
-	paddw %mm5,%mm6
-
-	psrlw $8,%mm6
-	packuswb %mm6, %mm6 
-	movd %mm6,(%esi)
-
-	addl $4, %esi
-		
-	cmpl %esi,28(%ebp)
-	je   .out
-
-	cmpl %edx,-24(%ebp)
-	je   .loop
-
-.newx:
-	movl %edx,-24(%ebp)
-/*
- * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
- */
-	movq %mm1, %mm0
-	movq %mm3, %mm2
-
-	shll $2, %edx
-
-/* #	%mm4 will always be already clear here	 */
-/* #	pxor %mm4, %mm4 */
-
-	movl 16(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-/* 
- *	mull destroyed %edx, need to reconstitute 
- */
-	movl -24(%ebp),%edx
-	shll $2, %edx
-
-	orl  $0xff000000, %eax
-	movd %eax, %mm1
-	punpcklbw %mm4, %mm1
-	pmullw %mm5,%mm1
-		
-	movl 20(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-	orl  $0xff000000, %eax
-	movd %eax, %mm3
-	punpcklbw %mm4, %mm3
-	pmullw %mm5,%mm3
-	
-	psrlw $8,%mm1
-	psrlw $8,%mm3
-
-	movl 8(%ebp),%edi
-	
-	jmp .loop
-
-.out:
-	movl %esi,%eax
-	emms
-	leal -40(%ebp),%esp
-	popl %ebx
-	popl %esi
-	popl %edi
-	movl %ebp,%esp
-	popl %ebp
-	ret
diff --git a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S
deleted file mode 100644
index f72a8bf5d..000000000
--- a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-	.file	"composite_line_color_22_4a4_mmx.S"
-	.version	"01.01"
-gcc2_compiled.:
-.text
-	.align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)	
-
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack;  .previous
-#endif
-	
-.globl _pixops_composite_line_color_22_4a4_mmx
-	.type	 _pixops_composite_line_color_22_4a4_mmx,@function
-_pixops_composite_line_color_22_4a4_mmx:
-
-#else
-
-.globl __pixops_composite_line_color_22_4a4_mmx
-__pixops_composite_line_color_22_4a4_mmx:
-	
-#endif
-/*
- * Arguments
- *		
- * weights:	 8(%ebp)
- * p:	        12(%ebp)	%esi
- * q1:	        16(%ebp)	
- * q2:	        20(%ebp)	
- * xstep:       24(%ebp)	
- * p_end:       28(%ebp)
- * xinit:       32(%ebp)
- * dest_x:	36(%ebp)
- * check_shift:	40(%ebp)
- * colors:	44(%ebp)
- *	
-*/
-
-/*
- * Function call entry
- */
-	pushl %ebp
-	movl %esp,%ebp
-	subl $28,%esp
-	pushl %edi
-	pushl %esi
-	pushl %ebx
-/* Locals:	
- * int x                      %ebx
- * int x_scaled             -24(%ebp)
- */
-
-/*
- * Setup
- */
-/* Initialize variables */	
-	movl 32(%ebp),%ebx
-	movl 32(%ebp),%edx
-	sarl $16,%edx
-	movl 12(%ebp),%esi
-
-	movl %edx,-24(%ebp)
-
-	cmpl 28(%ebp),%esi
-	jnb  .out
-
-/* Load initial values into %mm1, %mm3 */
-	shll $2, %edx
-
-	pxor %mm4, %mm4
-
-	movl 16(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-	orl  $0xff000000, %eax
-	movd %eax, %mm1
-	punpcklbw %mm4, %mm1
-	pmullw %mm5,%mm1
-
-/* 
- *	mull destroyed %edx, need to reconstitute 
- */
-	movl -24(%ebp),%edx
-	shll $2, %edx
-		
-	movl 20(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-	orl  $0xff000000, %eax
-	movd %eax, %mm3
-	punpcklbw %mm4, %mm3
-	pmullw %mm5,%mm3
-
-	psrlw $8,%mm1
-	psrlw $8,%mm3
-
-	addl $65536,%ebx
-	movl %ebx,%edx
-	sarl $16,%edx
-
-	jmp .newx
-	.p2align 4,,7
-.loop:
-/* int x_index = (x & 0xf000) >> 12 */
-	movl %ebx,%eax
-	andl $0xf000,%eax
-	shrl $7,%eax
-
-	movq (%edi,%eax),%mm4
-	pmullw %mm0,%mm4
-	movq 8(%edi,%eax),%mm5
-	pmullw %mm1,%mm5
-	movq 16(%edi,%eax),%mm6
-	movq 24(%edi,%eax),%mm7
-	pmullw %mm2,%mm6
-	pmullw %mm3,%mm7
-	paddw %mm4, %mm5
-	paddw %mm6, %mm7
-	paddw %mm5, %mm7
-
-	movl $0xffff,%ecx
-	movd %ecx,%mm4
-	psllq $48,%mm4
-	movq %mm4,%mm6
-	psubw %mm7,%mm4
-	pand %mm6,%mm4
-	
-	movq %mm4,%mm5
-	psrlq $16,%mm4
-	por %mm4,%mm5
-	psrlq $32,%mm5
-	por %mm4,%mm5
-	
-	psrlw $8,%mm5
-
-	movl 36(%ebp),%eax
-	incl 36(%ebp)
-
-	movl 40(%ebp),%ecx
-	shrl %cl,%eax
-	andl $1,%eax
-
-	movl 44(%ebp),%ecx
-	movq (%ecx,%eax,8),%mm6
-
-	pmullw %mm6,%mm5
-
-/* x += x_step; */
-	addl 24(%ebp),%ebx
-/* x_scale = x >> 16; */
-	movl %ebx,%edx
-	sarl $16,%edx
-
-	paddw %mm5,%mm7
-
-	psrlw $8,%mm7
-	packuswb %mm7, %mm7 
-	movd %mm7,(%esi)
-
-	addl $4, %esi
-		
-	cmpl %esi,28(%ebp)
-	je   .out
-
-	cmpl %edx,-24(%ebp)
-	je   .loop
-
-.newx:
-	movl %edx,-24(%ebp)
-/*
- * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
- */
-	movq %mm1, %mm0
-	movq %mm3, %mm2
-
-	shll $2, %edx
-
-	pxor %mm4, %mm4
-
-	movl 16(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-/* 
- *	mull destroyed %edx, need to reconstitute 
- */
-	movl -24(%ebp),%edx
-	shll $2, %edx
-
-	orl  $0xff000000, %eax
-	movd %eax, %mm1
-	punpcklbw %mm4, %mm1
-	pmullw %mm5,%mm1
-		
-	movl 20(%ebp),%edi
-	movl (%edi, %edx), %eax
-	movd (%edi, %edx), %mm5
-	punpcklbw %mm4, %mm5
-	shrl $24, %eax
-	movl $0x010101, %ecx
-	mull %ecx
-	orl  $0xff000000, %eax
-	movd %eax, %mm3
-	punpcklbw %mm4, %mm3
-	pmullw %mm5,%mm3
-	
-	psrlw $8,%mm1
-	psrlw $8,%mm3
-
-	movl 8(%ebp),%edi
-	
-	jmp .loop
-
-.out:
-	movl %esi,%eax
-	emms
-	leal -40(%ebp),%esp
-	popl %ebx
-	popl %esi
-	popl %edi
-	movl %ebp,%esp
-	popl %ebp
-	ret
diff --git a/gdk-pixbuf/pixops/have_mmx.S b/gdk-pixbuf/pixops/have_mmx.S
deleted file mode 100644
index ce0bb0088..000000000
--- a/gdk-pixbuf/pixops/have_mmx.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-	.file	"have_mmx.S"
-	.version	"01.01"
-gcc2_compiled.:
-.text
-	.align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
-
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack;  .previous
-#endif
-	
-.globl _pixops_have_mmx
-	.type	 _pixops_have_mmx,@function
-_pixops_have_mmx:
-
-#else
-
-.globl __pixops_have_mmx
-__pixops_have_mmx:
-
-#endif
-	
-	push	%ebx
-
-/* # Check if bit 21 in flags word is writeable */
-
-	pushfl	
-	popl	%eax
-	movl	%eax,%ebx
-	xorl	$0x00200000, %eax
-	pushl   %eax
-	popfl
-	pushfl
-	popl	%eax
-
-	cmpl	%eax, %ebx
-
-	je .notfound
-
-/* # OK, we have CPUID */
-
-	movl	$1, %eax
-	cpuid
-	
-	test	$0x00800000, %edx
-	jz	.notfound
-
-	movl	$1, %eax
-	jmp	.out
-
-.notfound:
-	movl  	$0, %eax
-.out:	
-	popl	%ebx
-	ret
-
diff --git a/gdk-pixbuf/pixops/pixops-internal.h b/gdk-pixbuf/pixops/pixops-internal.h
deleted file mode 100644
index 6497c2424..000000000
--- a/gdk-pixbuf/pixops/pixops-internal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifdef USE_MMX
-guchar *_pixops_scale_line_22_33_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init);
-guchar *_pixops_composite_line_22_4a4_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init);
-guchar *_pixops_composite_line_color_22_4a4_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init, int dest_x, int check_shift, int *colors);
-int _pixops_have_mmx (void);
-#endif
-
diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c
index f6535f1f3..f1b75f820 100644
--- a/gdk-pixbuf/pixops/pixops.c
+++ b/gdk-pixbuf/pixops/pixops.c
@@ -23,7 +23,6 @@
 
 #include "../fallback-c89.c"
 #include "pixops.h"
-#include "pixops-internal.h"
 
 #define SUBSAMPLE_BITS 4
 #define SUBSAMPLE (1 << SUBSAMPLE_BITS)
@@ -837,36 +836,6 @@ composite_line_22_4a4 (int *weights, int n_x, int n_y,
   return dest;
 }
 
-#ifdef USE_MMX
-static guchar *
-composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
-				int dest_x, guchar *dest_end,
-				int dest_channels, int dest_has_alpha,
-				guchar **src, int src_channels,
-				gboolean src_has_alpha, int x_init,
-				int x_step, int src_width, int check_size,
-				guint32 color1, guint32 color2)
-{
-  guint32 mmx_weights[16][8];
-  int j;
-
-  for (j=0; j<16; j++)
-    {
-      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
-      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
-    }
-
-  return _pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1],
-					    x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
-
 static void
 composite_pixel_color (guchar *dest, int dest_x, int dest_channels,
 		       int dest_has_alpha, int src_has_alpha, int check_size,
@@ -980,44 +949,6 @@ composite_line_color (int *weights, int n_x, int n_y, guchar *dest,
   return dest;
 }
 
-#ifdef USE_MMX
-static guchar *
-composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y,
-				      guchar *dest, int dest_x,
-				      guchar *dest_end, int dest_channels,
-				      int dest_has_alpha, guchar **src,
-				      int src_channels, gboolean src_has_alpha,
-				      int x_init, int x_step, int src_width,
-				      int check_size, guint32 color1,
-				      guint32 color2)
-{
-  guint32 mmx_weights[16][8];
-  int check_shift = get_check_shift (check_size);
-  int colors[4];
-  int j;
-
-  for (j=0; j<16; j++)
-    {
-      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
-      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
-    }
-
-  colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff);
-  colors[1] = (color1 & 0xff0000) >> 16;
-  colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff);
-  colors[3] = (color2 & 0xff0000) >> 16;
-
-  return _pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0],
-    src[1], x_step, dest_end, x_init, dest_x, check_shift, colors);
-}
-#endif /* USE_MMX */
-
 static void
 scale_pixel (guchar *dest, int dest_x, int dest_channels, int dest_has_alpha,
 	     int src_has_alpha, int check_size, guint32 color1, guint32 color2,
@@ -1142,34 +1073,6 @@ scale_line (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
   return dest;
 }
 
-#ifdef USE_MMX 
-static guchar *
-scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
-			   int dest_x, guchar *dest_end, int dest_channels,
-			   int dest_has_alpha, guchar **src, int src_channels,
-			   gboolean src_has_alpha, int x_init, int x_step,
-			   int src_width, int check_size, guint32 color1,
-			   guint32 color2)
-{
-  guint32 mmx_weights[16][8];
-  int j;
-
-  for (j=0; j<16; j++)
-    {
-      mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
-      mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
-      mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
-      mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
-      mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
-    }
-
-  return _pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1],
-				       x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
 
 static guchar *
 scale_line_22_33 (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
@@ -1910,10 +1813,6 @@ _pixops_composite_color_real (guchar          *dest_buf,
   PixopsLineFunc line_func;
   guchar *tmp_buf = NULL;
   
-#ifdef USE_MMX
-  gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
   g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
   g_return_if_fail (!(src_channels == 3 && src_has_alpha));
 
@@ -1941,14 +1840,7 @@ _pixops_composite_color_real (guchar          *dest_buf,
   if (!make_weights (&filter, interp_type, scale_x, scale_y))
     return;
 
-#ifdef USE_MMX
-  if (filter.x.n == 2 && filter.y.n == 2 &&
-      dest_channels == 4 && src_channels == 4 &&
-      src_has_alpha && !dest_has_alpha && found_mmx)
-    line_func = composite_line_color_22_4a4_mmx_stub;
-  else
-#endif
-    line_func = composite_line_color;
+  line_func = composite_line_color;
   
   pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
 		  dest_rowstride, dest_channels, dest_has_alpha,
@@ -2071,10 +1963,6 @@ _pixops_composite_real (guchar          *dest_buf,
   PixopsLineFunc line_func;
   guchar *tmp_buf = NULL;
   
-#ifdef USE_MMX
-  gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
   g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
   g_return_if_fail (!(src_channels == 3 && src_has_alpha));
 
@@ -2107,14 +1995,7 @@ _pixops_composite_real (guchar          *dest_buf,
 
   if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 4 &&
       src_channels == 4 && src_has_alpha && !dest_has_alpha)
-    {
-#ifdef USE_MMX
-      if (found_mmx)
-	line_func = composite_line_22_4a4_mmx_stub;
-      else
-#endif	
-	line_func = composite_line_22_4a4;
-    }
+    line_func = composite_line_22_4a4;
   else
     line_func = composite_line;
   
@@ -2491,10 +2372,6 @@ _pixops_scale_real (guchar        *dest_buf,
   PixopsLineFunc line_func;
   guchar *tmp_buf = NULL;	/* Temporary image for two-step scaling */
 
-#ifdef USE_MMX
-  gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
   g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
   g_return_if_fail (!(src_channels == 3 && src_has_alpha));
   g_return_if_fail (!(src_has_alpha && !dest_has_alpha));
@@ -2522,14 +2399,7 @@ _pixops_scale_real (guchar        *dest_buf,
     return;
 
   if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 3 && src_channels == 3)
-    {
-#ifdef USE_MMX
-      if (found_mmx)
-	line_func = scale_line_22_33_mmx_stub;
-      else
-#endif
-	line_func = scale_line_22_33;
-    }
+    line_func = scale_line_22_33;
   else
     line_func = scale_line;
   
diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
deleted file mode 100644
index 6080844c1..000000000
--- a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-	.file	"scale_line_22_33_mmx.S"
-	.version	"01.01"
-gcc2_compiled.:
-.text
-	.align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
-	
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack;  .previous
-#endif
-	
-.globl _pixops_scale_line_22_33_mmx
-	.type	 _pixops_scale_line_22_33_mmx,@function
-_pixops_scale_line_22_33_mmx:
-	
-#else
-	
-.globl __pixops_scale_line_22_33_mmx
-__pixops_scale_line_22_33_mmx:
-	
-#endif
-/*
- * Arguments
- *		
- * weights:	 8(%ebp)
- * p:	        12(%ebp)	%esi
- * q1:	        16(%ebp)	
- * q2:	        20(%ebp)	
- * xstep:       24(%ebp)	
- * p_end:       28(%ebp)
- * xinit:       32(%ebp)
- *	
-*/
-
-/*
- * Function call entry
- */
-	pushl %ebp
-	movl %esp,%ebp
-	subl $28,%esp
-	pushl %edi
-	pushl %esi
-	pushl %ebx
-/* Locals:	
- * int x                      %ebx
- * int x_scaled             -24(%ebp)
- */
-
-/*
- * Setup
- */
-/* Initialize variables */	
-	movl 32(%ebp),%ebx
-	movl 32(%ebp),%edx
-	sarl $16,%edx
-	movl 12(%ebp),%esi
-
-	cmpl 28(%ebp),%esi
-	jnb  .out
-
-/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining
- * points we are interpolating between, as:
- *
- *  000000BB00GG00RR
- */	
-	
-/* Load initial values into %mm1, %mm3 */
-	leal (%edx,%edx,2),%edx  # Multiply by 3
-
-	movl 16(%ebp),%edi
-	pxor %mm4, %mm4
-	movzbl 2(%edi,%edx),%ecx
-	shll $16,%ecx
-	movzwl (%edi,%edx),%eax
-	orl %eax,%ecx
-	movd %ecx, %mm1
-	punpcklbw %mm4, %mm1
-
-	movl 20(%ebp),%edi
-	movzbl 2(%edi,%edx),%ecx
-	shll $16,%ecx
-	movzwl (%edi,%edx),%eax
-	orl %eax,%ecx
-	movd %ecx, %mm3
-	punpcklbw %mm4, %mm3
-
-	addl $65536,%ebx
-	movl %ebx,%edx
-	sarl $16,%edx
-
-	jmp .newx
-	.p2align 4,,7
-.loop:
-/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
- *                                             16             4                  0xf            2     2
- */
-	movl %ebx,%eax
-	andl $0xf000,%eax
-	shrl $7,%eax
-
-/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and
- * accumulate.
- */
-	movq (%edi,%eax),%mm4
-	pmullw %mm0,%mm4
-	movq 8(%edi,%eax),%mm5
-	pmullw %mm1,%mm5
-	movq 16(%edi,%eax),%mm6
-	movq 24(%edi,%eax),%mm7
-	pmullw %mm2,%mm6
-	pmullw %mm3,%mm7
-	paddw %mm4, %mm5
-	paddw %mm6, %mm7
-	paddw %mm5, %mm7
-
-/* %mm7	holds the accumulated sum. Compute (C + 0x80) / 256
- */
-	pxor %mm4, %mm4
-	movl $8421504, %eax  # 0x00808080
-	movd %eax, %mm6  
-	punpcklbw %mm4, %mm6
-	paddw %mm6, %mm7
-	psrlw $8, %mm7
-
-/* Pack into %eax and store result
- */	
-	packuswb %mm7, %mm7
-	movd %mm7, %eax
-	
-	movb %al, (%esi)
-	shrl $8, %eax
-	movw %ax, 1(%esi)
-	addl $3, %esi
-		
-	cmpl %esi,28(%ebp)
-	je   .out
-
-/* x += x_step; */
-	addl 24(%ebp),%ebx
-/* x_scaled = x >> 16; */
-	movl %ebx,%edx
-	sarl $16,%edx
-
-	cmpl %edx,-24(%ebp)
-	je   .loop
-
-.newx:
-	movl %edx,-24(%ebp)
-/*
- * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
- */
-	movq %mm1, %mm0
-	movq %mm3, %mm2
-	
-	leal (%edx,%edx,2),%edx  # Multiply by 3
-
-	movl 16(%ebp),%edi
-	movzbl 2(%edi,%edx),%ecx
-	shll $16,%ecx
-	movzwl (%edi,%edx),%eax
-	orl %eax,%ecx
-	movd %ecx, %mm1
-	punpcklbw %mm4, %mm1
-
-	movl 20(%ebp),%edi
-	movzbl 2(%edi,%edx),%ecx
-	shll $16,%ecx
-	movzwl (%edi,%edx),%eax
-	orl %eax,%ecx
-	movd %ecx, %mm3
-	punpcklbw %mm4, %mm3
-	
-	movl 8(%ebp),%edi
-	
-	jmp .loop
-
-.out:
-	movl %esi,%eax
-	emms
-	leal -40(%ebp),%esp
-	popl %ebx
-	popl %esi
-	popl %edi
-	movl %ebp,%esp
-	popl %ebp
-	ret
author	Emmanuele Bassi <ebassi@gnome.org>	2018-04-28 11:36:19 +0100
committer	Emmanuele Bassi <ebassi@gnome.org>	2018-04-30 18:11:40 +0100
commit	4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f (patch)
tree	4213bfe3fca3055cd7691a57bdba5de083458dcd /gdk-pixbuf/pixops
parent	b4acb66c1e026fc3cd8a6d1ac961f68fb6fb91e2 (diff)
download	gdk-pixbuf-4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f.tar.gz