diff options
author | Emmanuele Bassi <ebassi@gnome.org> | 2018-04-28 11:36:19 +0100 |
---|---|---|
committer | Emmanuele Bassi <ebassi@gnome.org> | 2018-04-30 18:11:40 +0100 |
commit | 4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f (patch) | |
tree | 4213bfe3fca3055cd7691a57bdba5de083458dcd /gdk-pixbuf/pixops | |
parent | b4acb66c1e026fc3cd8a6d1ac961f68fb6fb91e2 (diff) | |
download | gdk-pixbuf-4b3d5be2c50d270326fad68f3a7a1c1b3a9dfd3f.tar.gz |
Drop the MMX assembly optimizations
We haven't built them on anything that isn't a 32bit IA platform, and
we could probably get better mileage out of the currently implemented
pixops just by rearranging the C code and letting compilers do the
optimizations for us. We should definitely consider either using pixman
directly, or replacing slow pixops with SSE builtins, instead.
Diffstat (limited to 'gdk-pixbuf/pixops')
-rw-r--r-- | gdk-pixbuf/pixops/DETAILS | 73 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/README | 17 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S | 239 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S | 251 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/have_mmx.S | 74 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/pixops-internal.h | 23 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/pixops.c | 136 | ||||
-rw-r--r-- | gdk-pixbuf/pixops/scale_line_22_33_mmx.S | 204 |
8 files changed, 3 insertions, 1014 deletions
diff --git a/gdk-pixbuf/pixops/DETAILS b/gdk-pixbuf/pixops/DETAILS index acf16f57e..08597f5f7 100644 --- a/gdk-pixbuf/pixops/DETAILS +++ b/gdk-pixbuf/pixops/DETAILS @@ -280,76 +280,3 @@ Integer tricks for compositing -MMX Code -======== - -Line functions are provided in MMX functionsfor a few special -cases: - - n_x = n_y = 2 - - src_channels = 3 dest_channels = 3 op = scale - src_channels = 4 with alpha dest_channels = 4 no alpha op = composite - src_channels = 4 with alpha dest_channels = 4 no alpha op = composite_color - -For the case n_x = n_y = 2 - primarily hit when scaling up with bilinear -scaling, we can take advantage of the fact that multiple destination -pixels will be composed from the same source pixels. - -That is a destination pixel is a linear combination of the source -pixels around it: - - - S0 S1 - - - - - - D D' D'' ... - - - - - S2 S3 - -Each mmx register is 64 bits wide, so we can unpack a source pixel -into the low 8 bits of 4 16 bit words, and store it into a mmx -register. - -For each destination pixel, we first make sure that we have pixels S0 -... S3 loaded into registers mm0 ...mm3. (This will often involve not -doing anything or moving mm1 and mm3 into mm0 and mm1 then reloading -mm1 and mm3 with new values). - -Then we load up the appropriate weights for the 4 corner pixels -based on the offsets of the destination pixel within the source -pixels. - -We have preexpanded the weights to 64 bits wide and truncated the -range to 8 bits, so an original filter value of - - 0x5321 would be expanded to - - 0x0053005300530053 - -For source buffers without alpha, we simply do a multiply-add -of the weights, giving us a 16 bit quantity for the result -that we shift left by 8 and store in the destination buffer. - -When the source buffer has alpha, then things become more -complicated - when we load up mm0 and mm3, we premultiply -the alpha, so they contain: - - (a*ff >> 8) (r*a >> 8) (g*a >> 8) (b*a >> a) - -Then when we multiply by the weights, and add we end up -with premultiplied r,g,b,a in the range of 0 .. 0xff * 0ff, -call them A,R,G,B - -We then need to composite with the dest pixels - which -we do by: - - r_dest = (R + ((0xff * 0xff - A) >> 8) * r_dest) >> 8 - -(0xff * 0xff) diff --git a/gdk-pixbuf/pixops/README b/gdk-pixbuf/pixops/README index 354c3a197..382fca128 100644 --- a/gdk-pixbuf/pixops/README +++ b/gdk-pixbuf/pixops/README @@ -138,23 +138,6 @@ TODO switching around conditionals and inner loops in various places. -* Right now, in several of the most common cases, there are - optimized mmx routines, but no optimized C routines. - - For instance, there is a - - pixops_composite_line_22_4a4_mmx() - - But no - - pixops_composite_line_22_4a4() - - Also, it may be desirable to include a few more special cases - in particular: - - pixops_composite_line_22_4a3() - - May be desirable. - * Scaling down images by large scale factors is _slow_ since huge filter matrixes are computed. (e.g., to scale down by a factor of 100, we compute 101x101 filter matrixes. At some point, it would be more efficent to diff --git a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S deleted file mode 100644 index c062cad91..000000000 --- a/gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (C) 2000 Red Hat, Inc - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - .file "composite_line_22_4a4_mmx.S" - .version "01.01" -gcc2_compiled.: -.text - .align 16 - -#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX) - -/* Magic indicating no need for an executable stack */ -#if !defined __powerpc64__ && !defined __ia64__ -.section .note.GNU-stack; .previous -#endif - -.globl _pixops_composite_line_22_4a4_mmx - .type _pixops_composite_line_22_4a4_mmx,@function -_pixops_composite_line_22_4a4_mmx: - -#else - -.globl __pixops_composite_line_22_4a4_mmx -__pixops_composite_line_22_4a4_mmx: - -#endif -/* - * Arguments - * - * weights: 8(%ebp) - * p: 12(%ebp) %esi - * q1: 16(%ebp) - * q2: 20(%ebp) - * xstep: 24(%ebp) - * p_end: 28(%ebp) - * xinit: 32(%ebp) - * -*/ -/* - * Function call entry - */ - pushl %ebp - movl %esp,%ebp - subl $28,%esp - pushl %edi - pushl %esi - pushl %ebx -/* Locals: - * int x %ebx - * int x_scaled -24(%ebp) - */ - -/* - * Setup - */ -/* Initialize variables */ - movl 32(%ebp),%ebx - movl 32(%ebp),%edx - sarl $16,%edx - movl 12(%ebp),%esi - - movl %edx,-24(%ebp) - - cmpl 28(%ebp),%esi - jnb .out - -/* Load initial values into %mm1, %mm3 */ - shll $2, %edx - - pxor %mm4, %mm4 - - movl 16(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx - orl $0xff000000, %eax - movd %eax, %mm1 - punpcklbw %mm4, %mm1 - pmullw %mm5,%mm1 - - movl -24(%ebp),%edx - shll $2, %edx - - movl 20(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx - orl $0xff000000, %eax - movd %eax, %mm3 - punpcklbw %mm4, %mm3 - pmullw %mm5,%mm3 - - psrlw $8,%mm1 - psrlw $8,%mm3 - - addl $65536,%ebx - movl %ebx,%edx - sarl $16,%edx - - jmp .newx - .p2align 4,,7 -.loop: -/* int x_index = (x & 0xf000) >> 12 */ - movl %ebx,%eax - andl $0xf000,%eax - shrl $7,%eax - - movq (%edi,%eax),%mm4 - pmullw %mm0,%mm4 - movq 8(%edi,%eax),%mm5 - pmullw %mm1,%mm5 - movq 16(%edi,%eax),%mm6 - movq 24(%edi,%eax),%mm7 - pmullw %mm2,%mm6 - pmullw %mm3,%mm7 - paddw %mm4, %mm5 - paddw %mm6, %mm7 - paddw %mm5, %mm7 - - movl $0xffff,%ecx - movd %ecx,%mm4 - psllq $48,%mm4 - movq %mm4,%mm6 - psubw %mm7,%mm4 - pand %mm6,%mm4 - - movq %mm4,%mm5 - psrlq $16,%mm4 - por %mm4,%mm5 - psrlq $32,%mm5 - por %mm4,%mm5 - - psrlw $8,%mm5 - - movd (%esi),%mm7 - pxor %mm4,%mm4 - punpcklbw %mm4, %mm7 - - pmullw %mm7,%mm5 - -/* x += x_step; */ - addl 24(%ebp),%ebx -/* x_scale = x >> 16; */ - movl %ebx,%edx - sarl $16,%edx - - paddw %mm5,%mm6 - - psrlw $8,%mm6 - packuswb %mm6, %mm6 - movd %mm6,(%esi) - - addl $4, %esi - - cmpl %esi,28(%ebp) - je .out - - cmpl %edx,-24(%ebp) - je .loop - -.newx: - movl %edx,-24(%ebp) -/* - * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 - */ - movq %mm1, %mm0 - movq %mm3, %mm2 - - shll $2, %edx - -/* # %mm4 will always be already clear here */ -/* # pxor %mm4, %mm4 */ - - movl 16(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx -/* - * mull destroyed %edx, need to reconstitute - */ - movl -24(%ebp),%edx - shll $2, %edx - - orl $0xff000000, %eax - movd %eax, %mm1 - punpcklbw %mm4, %mm1 - pmullw %mm5,%mm1 - - movl 20(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx - orl $0xff000000, %eax - movd %eax, %mm3 - punpcklbw %mm4, %mm3 - pmullw %mm5,%mm3 - - psrlw $8,%mm1 - psrlw $8,%mm3 - - movl 8(%ebp),%edi - - jmp .loop - -.out: - movl %esi,%eax - emms - leal -40(%ebp),%esp - popl %ebx - popl %esi - popl %edi - movl %ebp,%esp - popl %ebp - ret diff --git a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S b/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S deleted file mode 100644 index f72a8bf5d..000000000 --- a/gdk-pixbuf/pixops/composite_line_color_22_4a4_mmx.S +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright (C) 2000 Red Hat, Inc - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - .file "composite_line_color_22_4a4_mmx.S" - .version "01.01" -gcc2_compiled.: -.text - .align 16 - -#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX) - -/* Magic indicating no need for an executable stack */ -#if !defined __powerpc64__ && !defined __ia64__ -.section .note.GNU-stack; .previous -#endif - -.globl _pixops_composite_line_color_22_4a4_mmx - .type _pixops_composite_line_color_22_4a4_mmx,@function -_pixops_composite_line_color_22_4a4_mmx: - -#else - -.globl __pixops_composite_line_color_22_4a4_mmx -__pixops_composite_line_color_22_4a4_mmx: - -#endif -/* - * Arguments - * - * weights: 8(%ebp) - * p: 12(%ebp) %esi - * q1: 16(%ebp) - * q2: 20(%ebp) - * xstep: 24(%ebp) - * p_end: 28(%ebp) - * xinit: 32(%ebp) - * dest_x: 36(%ebp) - * check_shift: 40(%ebp) - * colors: 44(%ebp) - * -*/ - -/* - * Function call entry - */ - pushl %ebp - movl %esp,%ebp - subl $28,%esp - pushl %edi - pushl %esi - pushl %ebx -/* Locals: - * int x %ebx - * int x_scaled -24(%ebp) - */ - -/* - * Setup - */ -/* Initialize variables */ - movl 32(%ebp),%ebx - movl 32(%ebp),%edx - sarl $16,%edx - movl 12(%ebp),%esi - - movl %edx,-24(%ebp) - - cmpl 28(%ebp),%esi - jnb .out - -/* Load initial values into %mm1, %mm3 */ - shll $2, %edx - - pxor %mm4, %mm4 - - movl 16(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx - orl $0xff000000, %eax - movd %eax, %mm1 - punpcklbw %mm4, %mm1 - pmullw %mm5,%mm1 - -/* - * mull destroyed %edx, need to reconstitute - */ - movl -24(%ebp),%edx - shll $2, %edx - - movl 20(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx - orl $0xff000000, %eax - movd %eax, %mm3 - punpcklbw %mm4, %mm3 - pmullw %mm5,%mm3 - - psrlw $8,%mm1 - psrlw $8,%mm3 - - addl $65536,%ebx - movl %ebx,%edx - sarl $16,%edx - - jmp .newx - .p2align 4,,7 -.loop: -/* int x_index = (x & 0xf000) >> 12 */ - movl %ebx,%eax - andl $0xf000,%eax - shrl $7,%eax - - movq (%edi,%eax),%mm4 - pmullw %mm0,%mm4 - movq 8(%edi,%eax),%mm5 - pmullw %mm1,%mm5 - movq 16(%edi,%eax),%mm6 - movq 24(%edi,%eax),%mm7 - pmullw %mm2,%mm6 - pmullw %mm3,%mm7 - paddw %mm4, %mm5 - paddw %mm6, %mm7 - paddw %mm5, %mm7 - - movl $0xffff,%ecx - movd %ecx,%mm4 - psllq $48,%mm4 - movq %mm4,%mm6 - psubw %mm7,%mm4 - pand %mm6,%mm4 - - movq %mm4,%mm5 - psrlq $16,%mm4 - por %mm4,%mm5 - psrlq $32,%mm5 - por %mm4,%mm5 - - psrlw $8,%mm5 - - movl 36(%ebp),%eax - incl 36(%ebp) - - movl 40(%ebp),%ecx - shrl %cl,%eax - andl $1,%eax - - movl 44(%ebp),%ecx - movq (%ecx,%eax,8),%mm6 - - pmullw %mm6,%mm5 - -/* x += x_step; */ - addl 24(%ebp),%ebx -/* x_scale = x >> 16; */ - movl %ebx,%edx - sarl $16,%edx - - paddw %mm5,%mm7 - - psrlw $8,%mm7 - packuswb %mm7, %mm7 - movd %mm7,(%esi) - - addl $4, %esi - - cmpl %esi,28(%ebp) - je .out - - cmpl %edx,-24(%ebp) - je .loop - -.newx: - movl %edx,-24(%ebp) -/* - * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 - */ - movq %mm1, %mm0 - movq %mm3, %mm2 - - shll $2, %edx - - pxor %mm4, %mm4 - - movl 16(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx -/* - * mull destroyed %edx, need to reconstitute - */ - movl -24(%ebp),%edx - shll $2, %edx - - orl $0xff000000, %eax - movd %eax, %mm1 - punpcklbw %mm4, %mm1 - pmullw %mm5,%mm1 - - movl 20(%ebp),%edi - movl (%edi, %edx), %eax - movd (%edi, %edx), %mm5 - punpcklbw %mm4, %mm5 - shrl $24, %eax - movl $0x010101, %ecx - mull %ecx - orl $0xff000000, %eax - movd %eax, %mm3 - punpcklbw %mm4, %mm3 - pmullw %mm5,%mm3 - - psrlw $8,%mm1 - psrlw $8,%mm3 - - movl 8(%ebp),%edi - - jmp .loop - -.out: - movl %esi,%eax - emms - leal -40(%ebp),%esp - popl %ebx - popl %esi - popl %edi - movl %ebp,%esp - popl %ebp - ret diff --git a/gdk-pixbuf/pixops/have_mmx.S b/gdk-pixbuf/pixops/have_mmx.S deleted file mode 100644 index ce0bb0088..000000000 --- a/gdk-pixbuf/pixops/have_mmx.S +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2000 Red Hat, Inc - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - .file "have_mmx.S" - .version "01.01" -gcc2_compiled.: -.text - .align 16 - -#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX) - -/* Magic indicating no need for an executable stack */ -#if !defined __powerpc64__ && !defined __ia64__ -.section .note.GNU-stack; .previous -#endif - -.globl _pixops_have_mmx - .type _pixops_have_mmx,@function -_pixops_have_mmx: - -#else - -.globl __pixops_have_mmx -__pixops_have_mmx: - -#endif - - push %ebx - -/* # Check if bit 21 in flags word is writeable */ - - pushfl - popl %eax - movl %eax,%ebx - xorl $0x00200000, %eax - pushl %eax - popfl - pushfl - popl %eax - - cmpl %eax, %ebx - - je .notfound - -/* # OK, we have CPUID */ - - movl $1, %eax - cpuid - - test $0x00800000, %edx - jz .notfound - - movl $1, %eax - jmp .out - -.notfound: - movl $0, %eax -.out: - popl %ebx - ret - diff --git a/gdk-pixbuf/pixops/pixops-internal.h b/gdk-pixbuf/pixops/pixops-internal.h deleted file mode 100644 index 6497c2424..000000000 --- a/gdk-pixbuf/pixops/pixops-internal.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (C) 2000 Red Hat, Inc - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#ifdef USE_MMX -guchar *_pixops_scale_line_22_33_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init); -guchar *_pixops_composite_line_22_4a4_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init); -guchar *_pixops_composite_line_color_22_4a4_mmx (guint32 weights[16][8], guchar *p, guchar *q1, guchar *q2, int x_step, guchar *p_stop, int x_init, int dest_x, int check_shift, int *colors); -int _pixops_have_mmx (void); -#endif - diff --git a/gdk-pixbuf/pixops/pixops.c b/gdk-pixbuf/pixops/pixops.c index f6535f1f3..f1b75f820 100644 --- a/gdk-pixbuf/pixops/pixops.c +++ b/gdk-pixbuf/pixops/pixops.c @@ -23,7 +23,6 @@ #include "../fallback-c89.c" #include "pixops.h" -#include "pixops-internal.h" #define SUBSAMPLE_BITS 4 #define SUBSAMPLE (1 << SUBSAMPLE_BITS) @@ -837,36 +836,6 @@ composite_line_22_4a4 (int *weights, int n_x, int n_y, return dest; } -#ifdef USE_MMX -static guchar * -composite_line_22_4a4_mmx_stub (int *weights, int n_x, int n_y, guchar *dest, - int dest_x, guchar *dest_end, - int dest_channels, int dest_has_alpha, - guchar **src, int src_channels, - gboolean src_has_alpha, int x_init, - int x_step, int src_width, int check_size, - guint32 color1, guint32 color2) -{ - guint32 mmx_weights[16][8]; - int j; - - for (j=0; j<16; j++) - { - mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8); - mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8); - mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8); - mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8); - mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8); - mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8); - mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8); - mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8); - } - - return _pixops_composite_line_22_4a4_mmx (mmx_weights, dest, src[0], src[1], - x_step, dest_end, x_init); -} -#endif /* USE_MMX */ - static void composite_pixel_color (guchar *dest, int dest_x, int dest_channels, int dest_has_alpha, int src_has_alpha, int check_size, @@ -980,44 +949,6 @@ composite_line_color (int *weights, int n_x, int n_y, guchar *dest, return dest; } -#ifdef USE_MMX -static guchar * -composite_line_color_22_4a4_mmx_stub (int *weights, int n_x, int n_y, - guchar *dest, int dest_x, - guchar *dest_end, int dest_channels, - int dest_has_alpha, guchar **src, - int src_channels, gboolean src_has_alpha, - int x_init, int x_step, int src_width, - int check_size, guint32 color1, - guint32 color2) -{ - guint32 mmx_weights[16][8]; - int check_shift = get_check_shift (check_size); - int colors[4]; - int j; - - for (j=0; j<16; j++) - { - mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8); - mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8); - mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8); - mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8); - mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8); - mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8); - mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8); - mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8); - } - - colors[0] = (color1 & 0xff00) << 8 | (color1 & 0xff); - colors[1] = (color1 & 0xff0000) >> 16; - colors[2] = (color2 & 0xff00) << 8 | (color2 & 0xff); - colors[3] = (color2 & 0xff0000) >> 16; - - return _pixops_composite_line_color_22_4a4_mmx (mmx_weights, dest, src[0], - src[1], x_step, dest_end, x_init, dest_x, check_shift, colors); -} -#endif /* USE_MMX */ - static void scale_pixel (guchar *dest, int dest_x, int dest_channels, int dest_has_alpha, int src_has_alpha, int check_size, guint32 color1, guint32 color2, @@ -1142,34 +1073,6 @@ scale_line (int *weights, int n_x, int n_y, guchar *dest, int dest_x, return dest; } -#ifdef USE_MMX -static guchar * -scale_line_22_33_mmx_stub (int *weights, int n_x, int n_y, guchar *dest, - int dest_x, guchar *dest_end, int dest_channels, - int dest_has_alpha, guchar **src, int src_channels, - gboolean src_has_alpha, int x_init, int x_step, - int src_width, int check_size, guint32 color1, - guint32 color2) -{ - guint32 mmx_weights[16][8]; - int j; - - for (j=0; j<16; j++) - { - mmx_weights[j][0] = 0x00010001 * (weights[4*j] >> 8); - mmx_weights[j][1] = 0x00010001 * (weights[4*j] >> 8); - mmx_weights[j][2] = 0x00010001 * (weights[4*j + 1] >> 8); - mmx_weights[j][3] = 0x00010001 * (weights[4*j + 1] >> 8); - mmx_weights[j][4] = 0x00010001 * (weights[4*j + 2] >> 8); - mmx_weights[j][5] = 0x00010001 * (weights[4*j + 2] >> 8); - mmx_weights[j][6] = 0x00010001 * (weights[4*j + 3] >> 8); - mmx_weights[j][7] = 0x00010001 * (weights[4*j + 3] >> 8); - } - - return _pixops_scale_line_22_33_mmx (mmx_weights, dest, src[0], src[1], - x_step, dest_end, x_init); -} -#endif /* USE_MMX */ static guchar * scale_line_22_33 (int *weights, int n_x, int n_y, guchar *dest, int dest_x, @@ -1910,10 +1813,6 @@ _pixops_composite_color_real (guchar *dest_buf, PixopsLineFunc line_func; guchar *tmp_buf = NULL; -#ifdef USE_MMX - gboolean found_mmx = _pixops_have_mmx (); -#endif - g_return_if_fail (!(dest_channels == 3 && dest_has_alpha)); g_return_if_fail (!(src_channels == 3 && src_has_alpha)); @@ -1941,14 +1840,7 @@ _pixops_composite_color_real (guchar *dest_buf, if (!make_weights (&filter, interp_type, scale_x, scale_y)) return; -#ifdef USE_MMX - if (filter.x.n == 2 && filter.y.n == 2 && - dest_channels == 4 && src_channels == 4 && - src_has_alpha && !dest_has_alpha && found_mmx) - line_func = composite_line_color_22_4a4_mmx_stub; - else -#endif - line_func = composite_line_color; + line_func = composite_line_color; pixops_process (dest_buf, render_x0, render_y0, render_x1, render_y1, dest_rowstride, dest_channels, dest_has_alpha, @@ -2071,10 +1963,6 @@ _pixops_composite_real (guchar *dest_buf, PixopsLineFunc line_func; guchar *tmp_buf = NULL; -#ifdef USE_MMX - gboolean found_mmx = _pixops_have_mmx (); -#endif - g_return_if_fail (!(dest_channels == 3 && dest_has_alpha)); g_return_if_fail (!(src_channels == 3 && src_has_alpha)); @@ -2107,14 +1995,7 @@ _pixops_composite_real (guchar *dest_buf, if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 4 && src_channels == 4 && src_has_alpha && !dest_has_alpha) - { -#ifdef USE_MMX - if (found_mmx) - line_func = composite_line_22_4a4_mmx_stub; - else -#endif - line_func = composite_line_22_4a4; - } + line_func = composite_line_22_4a4; else line_func = composite_line; @@ -2491,10 +2372,6 @@ _pixops_scale_real (guchar *dest_buf, PixopsLineFunc line_func; guchar *tmp_buf = NULL; /* Temporary image for two-step scaling */ -#ifdef USE_MMX - gboolean found_mmx = _pixops_have_mmx (); -#endif - g_return_if_fail (!(dest_channels == 3 && dest_has_alpha)); g_return_if_fail (!(src_channels == 3 && src_has_alpha)); g_return_if_fail (!(src_has_alpha && !dest_has_alpha)); @@ -2522,14 +2399,7 @@ _pixops_scale_real (guchar *dest_buf, return; if (filter.x.n == 2 && filter.y.n == 2 && dest_channels == 3 && src_channels == 3) - { -#ifdef USE_MMX - if (found_mmx) - line_func = scale_line_22_33_mmx_stub; - else -#endif - line_func = scale_line_22_33; - } + line_func = scale_line_22_33; else line_func = scale_line; diff --git a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S b/gdk-pixbuf/pixops/scale_line_22_33_mmx.S deleted file mode 100644 index 6080844c1..000000000 --- a/gdk-pixbuf/pixops/scale_line_22_33_mmx.S +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (C) 2000 Red Hat, Inc - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - .file "scale_line_22_33_mmx.S" - .version "01.01" -gcc2_compiled.: -.text - .align 16 - -#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX) - -/* Magic indicating no need for an executable stack */ -#if !defined __powerpc64__ && !defined __ia64__ -.section .note.GNU-stack; .previous -#endif - -.globl _pixops_scale_line_22_33_mmx - .type _pixops_scale_line_22_33_mmx,@function -_pixops_scale_line_22_33_mmx: - -#else - -.globl __pixops_scale_line_22_33_mmx -__pixops_scale_line_22_33_mmx: - -#endif -/* - * Arguments - * - * weights: 8(%ebp) - * p: 12(%ebp) %esi - * q1: 16(%ebp) - * q2: 20(%ebp) - * xstep: 24(%ebp) - * p_end: 28(%ebp) - * xinit: 32(%ebp) - * -*/ - -/* - * Function call entry - */ - pushl %ebp - movl %esp,%ebp - subl $28,%esp - pushl %edi - pushl %esi - pushl %ebx -/* Locals: - * int x %ebx - * int x_scaled -24(%ebp) - */ - -/* - * Setup - */ -/* Initialize variables */ - movl 32(%ebp),%ebx - movl 32(%ebp),%edx - sarl $16,%edx - movl 12(%ebp),%esi - - cmpl 28(%ebp),%esi - jnb .out - -/* For the body of this loop, %mm01, %mm1, %mm2, %mm3 hold the 4 adjoining - * points we are interpolating between, as: - * - * 000000BB00GG00RR - */ - -/* Load initial values into %mm1, %mm3 */ - leal (%edx,%edx,2),%edx # Multiply by 3 - - movl 16(%ebp),%edi - pxor %mm4, %mm4 - movzbl 2(%edi,%edx),%ecx - shll $16,%ecx - movzwl (%edi,%edx),%eax - orl %eax,%ecx - movd %ecx, %mm1 - punpcklbw %mm4, %mm1 - - movl 20(%ebp),%edi - movzbl 2(%edi,%edx),%ecx - shll $16,%ecx - movzwl (%edi,%edx),%eax - orl %eax,%ecx - movd %ecx, %mm3 - punpcklbw %mm4, %mm3 - - addl $65536,%ebx - movl %ebx,%edx - sarl $16,%edx - - jmp .newx - .p2align 4,,7 -.loop: -/* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y - * 16 4 0xf 2 2 - */ - movl %ebx,%eax - andl $0xf000,%eax - shrl $7,%eax - -/* At this point, %edi holds weights. Load the 4 weights into %mm4,%mm5,%mm6,%mm7, multiply and - * accumulate. - */ - movq (%edi,%eax),%mm4 - pmullw %mm0,%mm4 - movq 8(%edi,%eax),%mm5 - pmullw %mm1,%mm5 - movq 16(%edi,%eax),%mm6 - movq 24(%edi,%eax),%mm7 - pmullw %mm2,%mm6 - pmullw %mm3,%mm7 - paddw %mm4, %mm5 - paddw %mm6, %mm7 - paddw %mm5, %mm7 - -/* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256 - */ - pxor %mm4, %mm4 - movl $8421504, %eax # 0x00808080 - movd %eax, %mm6 - punpcklbw %mm4, %mm6 - paddw %mm6, %mm7 - psrlw $8, %mm7 - -/* Pack into %eax and store result - */ - packuswb %mm7, %mm7 - movd %mm7, %eax - - movb %al, (%esi) - shrl $8, %eax - movw %ax, 1(%esi) - addl $3, %esi - - cmpl %esi,28(%ebp) - je .out - -/* x += x_step; */ - addl 24(%ebp),%ebx -/* x_scaled = x >> 16; */ - movl %ebx,%edx - sarl $16,%edx - - cmpl %edx,-24(%ebp) - je .loop - -.newx: - movl %edx,-24(%ebp) -/* - * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2 - */ - movq %mm1, %mm0 - movq %mm3, %mm2 - - leal (%edx,%edx,2),%edx # Multiply by 3 - - movl 16(%ebp),%edi - movzbl 2(%edi,%edx),%ecx - shll $16,%ecx - movzwl (%edi,%edx),%eax - orl %eax,%ecx - movd %ecx, %mm1 - punpcklbw %mm4, %mm1 - - movl 20(%ebp),%edi - movzbl 2(%edi,%edx),%ecx - shll $16,%ecx - movzwl (%edi,%edx),%eax - orl %eax,%ecx - movd %ecx, %mm3 - punpcklbw %mm4, %mm3 - - movl 8(%ebp),%edi - - jmp .loop - -.out: - movl %esi,%eax - emms - leal -40(%ebp),%esp - popl %ebx - popl %esi - popl %edi - movl %ebp,%esp - popl %ebp - ret |