summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmmanuele Bassi <ebassi@gnome.org>2018-04-28 11:36:19 +0100
committerEmmanuele Bassi <ebassi@gnome.org>2018-04-30 18:11:40 +0100
commit4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f (patch)
tree4213bfe3fca3055cd7691a57bdba5de083458dcd
parentb4acb66c1e026fc3cd8a6d1ac961f68fb6fb91e2 (diff)
downloadgdk-pixbuf-4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f.tar.gz
Drop the MMX assembly optimizations
We haven't built them on anything that isn't a 32bit IA platform, and we could probably get better mileage out of the currently implemented pixops just by rearranging the C code and letting compilers do the optimizations for us. We should definitely consider either using pixman directly, or replacing slow pixops with SSE builtins, instead.
-rw-r--r--gdk-pixbuf/pixops/DETAILS73
-rw-r--r--gdk-pixbuf/pixops/README17
-rw-r--r--gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S239
-rw-r--r--gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S251
-rw-r--r--gdk-pixbuf/pixops/have_mmx.S74
-rw-r--r--gdk-pixbuf/pixops/pixops-internal.h23
-rw-r--r--gdk-pixbuf/pixops/pixops.c136
-rw-r--r--gdk-pixbuf/pixops/scale_line_22_33_mmx.S204
8 files changed, 3 insertions, 1014 deletions
diff --git a/gdk-pixbuf/pixops/DETAILS b/gdk-pixbuf/pixops/DETAILS
index acf16f57e..08597f5f7 100644
--- a/gdk-pixbuf/pixops/DETAILS
+++ b/gdk-pixbuf/pixops/DETAILS
@@ -280,76 +280,3 @@ Integer tricks for compositing
-MMX Code
-========
-
-Line functions are provided in MMX functionsfor a few special
-cases:
-
- n_x = n_y = 2
-
- src_channels = 3 dest_channels = 3 op = scale
- src_channels = 4 with alpha dest_channels = 4 no alpha op = composite
- src_channels = 4 with alpha dest_channels = 4 no alpha op = composite_color
-
-For the case n_x = n_y = 2 - primarily hit when scaling up with bilinear
-scaling, we can take advantage of the fact that multiple destination
-pixels will be composed from the same source pixels.
-
-That is a destination pixel is a linear combination of the source
-pixels around it:
-
-
- S0 S1
-
-
-
-
-
- D D' D'' ...
-
-
-
-
- S2 S3
-
-Each mmx register is 64 bits wide, so we can unpack a source pixel
-into the low 8 bits of 4 16 bit words, and store it into a mmx
-register.
-
-For each destination pixel, we first make sure that we have pixels S0
-... S3 loaded into registers mm0 ...mm3. (This will often involve not
-doing anything or moving mm1 and mm3 into mm0 and mm1 then reloading
-mm1 and mm3 with new values).
-
-Then we load up the appropriate weights for the 4 corner pixels
-based on the offsets of the destination pixel within the source
-pixels.
-
-We have preexpanded the weights to 64 bits wide and truncated the
-range to 8 bits, so an original filter value of
-
- 0x5321 would be expanded to
-
- 0x0053005300530053
-
-For source buffers without alpha, we simply do a multiply-add
-of the weights, giving us a 16 bit quantity for the result
-that we shift left by 8 and store in the destination buffer.
-
-When the source buffer has alpha, then things become more
-complicated - when we load up mm0 and mm3, we premultiply
-the alpha, so they contain:
-
- (a*ff >> 8) (r*a >> 8) (g*a >> 8) (b*a >> a)
-
-Then when we multiply by the weights, and add we end up
-with premultiplied r,g,b,a in the range of 0 .. 0xff * 0ff,
-call them A,R,G,B
-
-We then need to composite with the dest pixels - which
-we do by:
-
- r_dest = (R + ((0xff * 0xff - A) >> 8) * r_dest) >> 8
-
-(0xff * 0xff)
diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README
index 354c3a197..382fca128 100644
--- a/gdk-pixbuf/pixops/README
+++ b/gdk-pixbuf/pixops/README
@@ -138,23 +138,6 @@ TODO
switching around conditionals and inner loops in various
places.
-* Right now, in several of the most common cases, there are
- optimized mmx routines, but no optimized C routines.
-
- For instance, there is a
-
- pixops_composite_line_22_4a4_mmx()
-
- But no
-
- pixops_composite_line_22_4a4()
-
- Also, it may be desirable to include a few more special cases - in particular:
-
- pixops_composite_line_22_4a3()
-
- May be desirable.
-
* Scaling down images by large scale factors is _slow_ since huge filter
matrixes are computed. (e.g., to scale down by a factor of 100, we compute
101x101 filter matrixes. At some point, it would be more efficent to
diff --git a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S
deleted file mode 100644
index c062cad91..000000000
--- a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- .file "composite_line_22_4a4_mmx.S"
- .version "01.01"
-gcc2_compiled.:
-.text
- .align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
-
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack; .previous
-#endif
-
-.globl _pixops_composite_line_22_4a4_mmx
- .type _pixops_composite_line_22_4a4_mmx,@function
-_pixops_composite_line_22_4a4_mmx:
-
-#else
-
-.globl __pixops_composite_line_22_4a4_mmx
-__pixops_composite_line_22_4a4_mmx:
-
-#endif
-/*
- * Arguments
- *
- * weights: 8(%ebp)
- * p: 12(%ebp) %esi
- * q1: 16(%ebp)
- * q2: 20(%ebp)
- * xstep: 24(%ebp)
- * p_end: 28(%ebp)
- * xinit: 32(%ebp)
- *
-*/
-/*
- * Function call entry
- */
- pushl %ebp
- movl %esp,%ebp
- subl $28,%esp
- pushl %edi
- pushl %esi
- pushl %ebx
-/* Locals:
- * int x %ebx
- * int x_scaled -24(%ebp)
- */
-
-/*
- * Setup
- */
-/* Initialize variables */
- movl 32(%ebp),%ebx
- movl 32(%ebp),%edx
- sarl $16,%edx
- movl 12(%ebp),%esi
-
- movl %edx,-24(%ebp)
-
- cmpl 28(%ebp),%esi
- jnb .out
-
-/* Load initial values into %mm1, %mm3 */
- shll $2, %edx
-
- pxor %mm4, %mm4
-
- movl 16(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
- orl $0xff000000, %eax
- movd %eax, %mm1
- punpcklbw %mm4, %mm1
- pmullw %mm5,%mm1
-
- movl -24(%ebp),%edx
- shll $2, %edx
-
- movl 20(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
- orl $0xff000000, %eax
- movd %eax, %mm3
- punpcklbw %mm4, %mm3
- pmullw %mm5,%mm3
-
- psrlw $8,%mm1
- psrlw $8,%mm3
-
- addl $65536,%ebx
- movl %ebx,%edx
- sarl $16,%edx
-
- jmp .newx
- .p2align 4,,7
-.loop:
-/* int x_index = (x & 0xf000) >> 12 */
- movl %ebx,%eax
- andl $0xf000,%eax
- shrl $7,%eax
-
- movq (%edi,%eax),%mm4
- pmullw %mm0,%mm4
- movq 8(%edi,%eax),%mm5
- pmullw %mm1,%mm5
- movq 16(%edi,%eax),%mm6
- movq 24(%edi,%eax),%mm7
- pmullw %mm2,%mm6
- pmullw %mm3,%mm7
- paddw %mm4, %mm5
- paddw %mm6, %mm7
- paddw %mm5, %mm7
-
- movl $0xffff,%ecx
- movd %ecx,%mm4
- psllq $48,%mm4
- movq %mm4,%mm6
- psubw %mm7,%mm4
- pand %mm6,%mm4
-
- movq %mm4,%mm5
- psrlq $16,%mm4
- por %mm4,%mm5
- psrlq $32,%mm5
- por %mm4,%mm5
-
- psrlw $8,%mm5
-
- movd (%esi),%mm7
- pxor %mm4,%mm4
- punpcklbw %mm4, %mm7
-
- pmullw %mm7,%mm5
-
-/* x += x_step; */
- addl 24(%ebp),%ebx
-/* x_scale = x >> 16; */
- movl %ebx,%edx
- sarl $16,%edx
-
- paddw %mm5,%mm6
-
- psrlw $8,%mm6
- packuswb %mm6, %mm6
- movd %mm6,(%esi)
-
- addl $4, %esi
-
- cmpl %esi,28(%ebp)
- je .out
-
- cmpl %edx,-24(%ebp)
- je .loop
-
-.newx:
- movl %edx,-24(%ebp)
-/*
- * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
- */
- movq %mm1, %mm0
- movq %mm3, %mm2
-
- shll $2, %edx
-
-/* # %mm4 will always be already clear here */
-/* # pxor %mm4, %mm4 */
-
- movl 16(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
-/*
- * mull destroyed %edx, need to reconstitute
- */
- movl -24(%ebp),%edx
- shll $2, %edx
-
- orl $0xff000000, %eax
- movd %eax, %mm1
- punpcklbw %mm4, %mm1
- pmullw %mm5,%mm1
-
- movl 20(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
- orl $0xff000000, %eax
- movd %eax, %mm3
- punpcklbw %mm4, %mm3
- pmullw %mm5,%mm3
-
- psrlw $8,%mm1
- psrlw $8,%mm3
-
- movl 8(%ebp),%edi
-
- jmp .loop
-
-.out:
- movl %esi,%eax
- emms
- leal -40(%ebp),%esp
- popl %ebx
- popl %esi
- popl %edi
- movl %ebp,%esp
- popl %ebp
- ret
diff --git a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S
deleted file mode 100644
index f72a8bf5d..000000000
--- a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- .file "composite_line_color_22_4a4_mmx.S"
- .version "01.01"
-gcc2_compiled.:
-.text
- .align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
-
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack; .previous
-#endif
-
-.globl _pixops_composite_line_color_22_4a4_mmx
- .type _pixops_composite_line_color_22_4a4_mmx,@function
-_pixops_composite_line_color_22_4a4_mmx:
-
-#else
-
-.globl __pixops_composite_line_color_22_4a4_mmx
-__pixops_composite_line_color_22_4a4_mmx:
-
-#endif
-/*
- * Arguments
- *
- * weights: 8(%ebp)
- * p: 12(%ebp) %esi
- * q1: 16(%ebp)
- * q2: 20(%ebp)
- * xstep: 24(%ebp)
- * p_end: 28(%ebp)
- * xinit: 32(%ebp)
- * dest_x: 36(%ebp)
- * check_shift: 40(%ebp)
- * colors: 44(%ebp)
- *
-*/
-
-/*
- * Function call entry
- */
- pushl %ebp
- movl %esp,%ebp
- subl $28,%esp
- pushl %edi
- pushl %esi
- pushl %ebx
-/* Locals:
- * int x %ebx
- * int x_scaled -24(%ebp)
- */
-
-/*
- * Setup
- */
-/* Initialize variables */
- movl 32(%ebp),%ebx
- movl 32(%ebp),%edx
- sarl $16,%edx
- movl 12(%ebp),%esi
-
- movl %edx,-24(%ebp)
-
- cmpl 28(%ebp),%esi
- jnb .out
-
-/* Load initial values into %mm1, %mm3 */
- shll $2, %edx
-
- pxor %mm4, %mm4
-
- movl 16(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
- orl $0xff000000, %eax
- movd %eax, %mm1
- punpcklbw %mm4, %mm1
- pmullw %mm5,%mm1
-
-/*
- * mull destroyed %edx, need to reconstitute
- */
- movl -24(%ebp),%edx
- shll $2, %edx
-
- movl 20(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
- orl $0xff000000, %eax
- movd %eax, %mm3
- punpcklbw %mm4, %mm3
- pmullw %mm5,%mm3
-
- psrlw $8,%mm1
- psrlw $8,%mm3
-
- addl $65536,%ebx
- movl %ebx,%edx
- sarl $16,%edx
-
- jmp .newx
- .p2align 4,,7
-.loop:
-/* int x_index = (x & 0xf000) >> 12 */
- movl %ebx,%eax
- andl $0xf000,%eax
- shrl $7,%eax
-
- movq (%edi,%eax),%mm4
- pmullw %mm0,%mm4
- movq 8(%edi,%eax),%mm5
- pmullw %mm1,%mm5
- movq 16(%edi,%eax),%mm6
- movq 24(%edi,%eax),%mm7
- pmullw %mm2,%mm6
- pmullw %mm3,%mm7
- paddw %mm4, %mm5
- paddw %mm6, %mm7
- paddw %mm5, %mm7
-
- movl $0xffff,%ecx
- movd %ecx,%mm4
- psllq $48,%mm4
- movq %mm4,%mm6
- psubw %mm7,%mm4
- pand %mm6,%mm4
-
- movq %mm4,%mm5
- psrlq $16,%mm4
- por %mm4,%mm5
- psrlq $32,%mm5
- por %mm4,%mm5
-
- psrlw $8,%mm5
-
- movl 36(%ebp),%eax
- incl 36(%ebp)
-
- movl 40(%ebp),%ecx
- shrl %cl,%eax
- andl $1,%eax
-
- movl 44(%ebp),%ecx
- movq (%ecx,%eax,8),%mm6
-
- pmullw %mm6,%mm5
-
-/* x += x_step; */
- addl 24(%ebp),%ebx
-/* x_scale = x >> 16; */
- movl %ebx,%edx
- sarl $16,%edx
-
- paddw %mm5,%mm7
-
- psrlw $8,%mm7
- packuswb %mm7, %mm7
- movd %mm7,(%esi)
-
- addl $4, %esi
-
- cmpl %esi,28(%ebp)
- je .out
-
- cmpl %edx,-24(%ebp)
- je .loop
-
-.newx:
- movl %edx,-24(%ebp)
-/*
- * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
- */
- movq %mm1, %mm0
- movq %mm3, %mm2
-
- shll $2, %edx
-
- pxor %mm4, %mm4
-
- movl 16(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
-/*
- * mull destroyed %edx, need to reconstitute
- */
- movl -24(%ebp),%edx
- shll $2, %edx
-
- orl $0xff000000, %eax
- movd %eax, %mm1
- punpcklbw %mm4, %mm1
- pmullw %mm5,%mm1
-
- movl 20(%ebp),%edi
- movl (%edi, %edx), %eax
- movd (%edi, %edx), %mm5
- punpcklbw %mm4, %mm5
- shrl $24, %eax
- movl $0x010101, %ecx
- mull %ecx
- orl $0xff000000, %eax
- movd %eax, %mm3
- punpcklbw %mm4, %mm3
- pmullw %mm5,%mm3
-
- psrlw $8,%mm1
- psrlw $8,%mm3
-
- movl 8(%ebp),%edi
-
- jmp .loop
-
-.out:
- movl %esi,%eax
- emms
- leal -40(%ebp),%esp
- popl %ebx
- popl %esi
- popl %edi
- movl %ebp,%esp
- popl %ebp
- ret
diff --git a/gdk-pixbuf/pixops/have_mmx.S b/gdk-pixbuf/pixops/have_mmx.S
deleted file mode 100644
index ce0bb0088..000000000
--- a/gdk-pixbuf/pixops/have_mmx.S
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- .file "have_mmx.S"
- .version "01.01"
-gcc2_compiled.:
-.text
- .align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
-
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack; .previous
-#endif
-
-.globl _pixops_have_mmx
- .type _pixops_have_mmx,@function
-_pixops_have_mmx:
-
-#else
-
-.globl __pixops_have_mmx
-__pixops_have_mmx:
-
-#endif
-
- push %ebx
-
-/* # Check if bit 21 in flags word is writeable */
-
- pushfl
- popl %eax
- movl %eax,%ebx
- xorl $0x00200000, %eax
- pushl %eax
- popfl
- pushfl
- popl %eax
-
- cmpl %eax, %ebx
-
- je .notfound
-
-/* # OK, we have CPUID */
-
- movl $1, %eax
- cpuid
-
- test $0x00800000, %edx
- jz .notfound
-
- movl $1, %eax
- jmp .out
-
-.notfound:
- movl $0, %eax
-.out:
- popl %ebx
- ret
-
diff --git a/gdk-pixbuf/pixops/pixops-internal.h b/gdk-pixbuf/pixops/pixops-internal.h
deleted file mode 100644
index 6497c2424..000000000
--- a/gdk-pixbuf/pixops/pixops-internal.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#ifdef USE_MMX
-guchar *_pixops_scale_line_22_33_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init);
-guchar *_pixops_composite_line_22_4a4_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init);
-guchar *_pixops_composite_line_color_22_4a4_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init, int dest_x, int check_shift, int *colors);
-int _pixops_have_mmx (void);
-#endif
-
diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c
index f6535f1f3..f1b75f820 100644
--- a/gdk-pixbuf/pixops/pixops.c
+++ b/gdk-pixbuf/pixops/pixops.c
@@ -23,7 +23,6 @@
#include "../fallback-c89.c"
#include "pixops.h"
-#include "pixops-internal.h"
#define SUBSAMPLE_BITS 4
#define SUBSAMPLE (1 << SUBSAMPLE_BITS)
@@ -837,36 +836,6 @@ composite_line_22_4a4 (int *weights, int n_x, int n_y,
return dest;
}
-#ifdef USE_MMX
-static guchar *
-composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
- int dest_x, guchar *dest_end,
- int dest_channels, int dest_has_alpha,
- guchar **src, int src_channels,
- gboolean src_has_alpha, int x_init,
- int x_step, int src_width, int check_size,
- guint32 color1, guint32 color2)
-{
- guint32 mmx_weights[16][8];
- int j;
-
- for (j=0; j<16; j++)
- {
- mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
- mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
- }
-
- return _pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1],
- x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
-
static void
composite_pixel_color (guchar *dest, int dest_x, int dest_channels,
int dest_has_alpha, int src_has_alpha, int check_size,
@@ -980,44 +949,6 @@ composite_line_color (int *weights, int n_x, int n_y, guchar *dest,
return dest;
}
-#ifdef USE_MMX
-static guchar *
-composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y,
- guchar *dest, int dest_x,
- guchar *dest_end, int dest_channels,
- int dest_has_alpha, guchar **src,
- int src_channels, gboolean src_has_alpha,
- int x_init, int x_step, int src_width,
- int check_size, guint32 color1,
- guint32 color2)
-{
- guint32 mmx_weights[16][8];
- int check_shift = get_check_shift (check_size);
- int colors[4];
- int j;
-
- for (j=0; j<16; j++)
- {
- mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
- mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
- }
-
- colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff);
- colors[1] = (color1 & 0xff0000) >> 16;
- colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff);
- colors[3] = (color2 & 0xff0000) >> 16;
-
- return _pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0],
- src[1], x_step, dest_end, x_init, dest_x, check_shift, colors);
-}
-#endif /* USE_MMX */
-
static void
scale_pixel (guchar *dest, int dest_x, int dest_channels, int dest_has_alpha,
int src_has_alpha, int check_size, guint32 color1, guint32 color2,
@@ -1142,34 +1073,6 @@ scale_line (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
return dest;
}
-#ifdef USE_MMX
-static guchar *
-scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, guchar *dest,
- int dest_x, guchar *dest_end, int dest_channels,
- int dest_has_alpha, guchar **src, int src_channels,
- gboolean src_has_alpha, int x_init, int x_step,
- int src_width, int check_size, guint32 color1,
- guint32 color2)
-{
- guint32 mmx_weights[16][8];
- int j;
-
- for (j=0; j<16; j++)
- {
- mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8);
- mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8);
- mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8);
- mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8);
- mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8);
- }
-
- return _pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1],
- x_step, dest_end, x_init);
-}
-#endif /* USE_MMX */
static guchar *
scale_line_22_33 (int *weights, int n_x, int n_y, guchar *dest, int dest_x,
@@ -1910,10 +1813,6 @@ _pixops_composite_color_real (guchar *dest_buf,
PixopsLineFunc line_func;
guchar *tmp_buf = NULL;
-#ifdef USE_MMX
- gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
g_return_if_fail (!(src_channels == 3 && src_has_alpha));
@@ -1941,14 +1840,7 @@ _pixops_composite_color_real (guchar *dest_buf,
if (!make_weights (&filter, interp_type, scale_x, scale_y))
return;
-#ifdef USE_MMX
- if (filter.x.n == 2 && filter.y.n == 2 &&
- dest_channels == 4 && src_channels == 4 &&
- src_has_alpha && !dest_has_alpha && found_mmx)
- line_func = composite_line_color_22_4a4_mmx_stub;
- else
-#endif
- line_func = composite_line_color;
+ line_func = composite_line_color;
pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1,
dest_rowstride, dest_channels, dest_has_alpha,
@@ -2071,10 +1963,6 @@ _pixops_composite_real (guchar *dest_buf,
PixopsLineFunc line_func;
guchar *tmp_buf = NULL;
-#ifdef USE_MMX
- gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
g_return_if_fail (!(src_channels == 3 && src_has_alpha));
@@ -2107,14 +1995,7 @@ _pixops_composite_real (guchar *dest_buf,
if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 4 &&
src_channels == 4 && src_has_alpha && !dest_has_alpha)
- {
-#ifdef USE_MMX
- if (found_mmx)
- line_func = composite_line_22_4a4_mmx_stub;
- else
-#endif
- line_func = composite_line_22_4a4;
- }
+ line_func = composite_line_22_4a4;
else
line_func = composite_line;
@@ -2491,10 +2372,6 @@ _pixops_scale_real (guchar *dest_buf,
PixopsLineFunc line_func;
guchar *tmp_buf = NULL; /* Temporary image for two-step scaling */
-#ifdef USE_MMX
- gboolean found_mmx = _pixops_have_mmx ();
-#endif
-
g_return_if_fail (!(dest_channels == 3 && dest_has_alpha));
g_return_if_fail (!(src_channels == 3 && src_has_alpha));
g_return_if_fail (!(src_has_alpha && !dest_has_alpha));
@@ -2522,14 +2399,7 @@ _pixops_scale_real (guchar *dest_buf,
return;
if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 3 && src_channels == 3)
- {
-#ifdef USE_MMX
- if (found_mmx)
- line_func = scale_line_22_33_mmx_stub;
- else
-#endif
- line_func = scale_line_22_33;
- }
+ line_func = scale_line_22_33;
else
line_func = scale_line;
diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
deleted file mode 100644
index 6080844c1..000000000
--- a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright (C) 2000 Red Hat, Inc
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
- .file "scale_line_22_33_mmx.S"
- .version "01.01"
-gcc2_compiled.:
-.text
- .align 16
-
-#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
-
-/* Magic indicating no need for an executable stack */
-#if !defined __powerpc64__ && !defined __ia64__
-.section .note.GNU-stack; .previous
-#endif
-
-.globl _pixops_scale_line_22_33_mmx
- .type _pixops_scale_line_22_33_mmx,@function
-_pixops_scale_line_22_33_mmx:
-
-#else
-
-.globl __pixops_scale_line_22_33_mmx
-__pixops_scale_line_22_33_mmx:
-
-#endif
-/*
- * Arguments
- *
- * weights: 8(%ebp)
- * p: 12(%ebp) %esi
- * q1: 16(%ebp)
- * q2: 20(%ebp)
- * xstep: 24(%ebp)
- * p_end: 28(%ebp)
- * xinit: 32(%ebp)
- *
-*/
-
-/*
- * Function call entry
- */
- pushl %ebp
- movl %esp,%ebp
- subl $28,%esp
- pushl %edi
- pushl %esi
- pushl %ebx
-/* Locals:
- * int x %ebx
- * int x_scaled -24(%ebp)
- */
-
-/*
- * Setup
- */
-/* Initialize variables */
- movl 32(%ebp),%ebx
- movl 32(%ebp),%edx
- sarl $16,%edx
- movl 12(%ebp),%esi
-
- cmpl 28(%ebp),%esi
- jnb .out
-
-/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining
- * points we are interpolating between, as:
- *
- * 000000BB00GG00RR
- */
-
-/* Load initial values into %mm1, %mm3 */
- leal (%edx,%edx,2),%edx # Multiply by 3
-
- movl 16(%ebp),%edi
- pxor %mm4, %mm4
- movzbl 2(%edi,%edx),%ecx
- shll $16,%ecx
- movzwl (%edi,%edx),%eax
- orl %eax,%ecx
- movd %ecx, %mm1
- punpcklbw %mm4, %mm1
-
- movl 20(%ebp),%edi
- movzbl 2(%edi,%edx),%ecx
- shll $16,%ecx
- movzwl (%edi,%edx),%eax
- orl %eax,%ecx
- movd %ecx, %mm3
- punpcklbw %mm4, %mm3
-
- addl $65536,%ebx
- movl %ebx,%edx
- sarl $16,%edx
-
- jmp .newx
- .p2align 4,,7
-.loop:
-/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
- * 16 4 0xf 2 2
- */
- movl %ebx,%eax
- andl $0xf000,%eax
- shrl $7,%eax
-
-/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and
- * accumulate.
- */
- movq (%edi,%eax),%mm4
- pmullw %mm0,%mm4
- movq 8(%edi,%eax),%mm5
- pmullw %mm1,%mm5
- movq 16(%edi,%eax),%mm6
- movq 24(%edi,%eax),%mm7
- pmullw %mm2,%mm6
- pmullw %mm3,%mm7
- paddw %mm4, %mm5
- paddw %mm6, %mm7
- paddw %mm5, %mm7
-
-/* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256
- */
- pxor %mm4, %mm4
- movl $8421504, %eax # 0x00808080
- movd %eax, %mm6
- punpcklbw %mm4, %mm6
- paddw %mm6, %mm7
- psrlw $8, %mm7
-
-/* Pack into %eax and store result
- */
- packuswb %mm7, %mm7
- movd %mm7, %eax
-
- movb %al, (%esi)
- shrl $8, %eax
- movw %ax, 1(%esi)
- addl $3, %esi
-
- cmpl %esi,28(%ebp)
- je .out
-
-/* x += x_step; */
- addl 24(%ebp),%ebx
-/* x_scaled = x >> 16; */
- movl %ebx,%edx
- sarl $16,%edx
-
- cmpl %edx,-24(%ebp)
- je .loop
-
-.newx:
- movl %edx,-24(%ebp)
-/*
- * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
- */
- movq %mm1, %mm0
- movq %mm3, %mm2
-
- leal (%edx,%edx,2),%edx # Multiply by 3
-
- movl 16(%ebp),%edi
- movzbl 2(%edi,%edx),%ecx
- shll $16,%ecx
- movzwl (%edi,%edx),%eax
- orl %eax,%ecx
- movd %ecx, %mm1
- punpcklbw %mm4, %mm1
-
- movl 20(%ebp),%edi
- movzbl 2(%edi,%edx),%ecx
- shll $16,%ecx
- movzwl (%edi,%edx),%eax
- orl %eax,%ecx
- movd %ecx, %mm3
- punpcklbw %mm4, %mm3
-
- movl 8(%ebp),%edi
-
- jmp .loop
-
-.out:
- movl %esi,%eax
- emms
- leal -40(%ebp),%esp
- popl %ebx
- popl %esi
- popl %edi
- movl %ebp,%esp
- popl %ebp
- ret