diff options
author | David Schleef <ds@schleef.org> | 2004-12-06 07:20:34 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2004-12-06 07:20:34 +0000 |
commit | cbe265592ffcf7b841ca7a3e3e4b196c5561d371 (patch) | |
tree | 5cbe41f25dfbbead6ba364412078c4f354b7c0eb /liboil/simdpack | |
parent | d94b113b3a8fcce9ec6c2170e39614c22b3bacf6 (diff) | |
download | liboil-cbe265592ffcf7b841ca7a3e3e4b196c5561d371.tar.gz |
* configure.ac: add uberopt
* examples/Makefile.am: same
* examples/oil-inspect.c: (impl_compare), (oil_print_class): sort
implementations by profile_ave before printing.
* examples/uberopt/Makefile.am: add uberopt
* examples/uberopt/example.c: (abs_u16_s16_i386asm_UBER_INDEX):
* examples/uberopt/uberopt.c: (main):
* liboil/copy/Makefile.am: add trans8x8_i386.c
* liboil/copy/copy.c: not reference functions
* liboil/copy/trans8x8.c: fix reference function (oops!)
* liboil/copy/trans8x8_i386.c: new. mmx implementation
* liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm_uber4),
(abs_u16_s16_i386asm3): new impl created by uberopt
* liboil/simdpack/abs_misc.c: (abs_u16_s16_unroll2),
(abs_u16_s16_unroll4): add unroll2
Diffstat (limited to 'liboil/simdpack')
-rw-r--r-- | liboil/simdpack/abs_i386.c | 76 | ||||
-rw-r--r-- | liboil/simdpack/abs_misc.c | 22 |
2 files changed, 62 insertions, 36 deletions
diff --git a/liboil/simdpack/abs_i386.c b/liboil/simdpack/abs_i386.c index 086892a..5a9e229 100644 --- a/liboil/simdpack/abs_i386.c +++ b/liboil/simdpack/abs_i386.c @@ -46,6 +46,29 @@ abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm, abs_u16_s16); +/* The previous function after running through uberopt */ +static void +abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src, + int sstr, int n) +{ + __asm__ __volatile__ ("\n" + " .p2align 4,,15 \n" + "1: \n" + " movswl (%0), %%eax \n" /* UBER 0: */ + " addl $2, %0 \n" /* UBER 1: 0 */ + " movl %%eax, %%edx \n" /* UBER 2: 0 */ + " decl %2 \n" /* UBER 7: */ + " negl %%edx \n" /* UBER 3: 2 */ + " cmpl $-1, %%eax ; cmovle %%edx, %%eax \n" /* UBER 4: 3 */ + " movw %%ax, (%1) \n" /* UBER 5: 4 */ + " addl $2, %1 \n" /* UBER 6: 5 */ + " testl %2, %2 \n" + " jg 1b \n" + :"+r" (src), "+r" (dest), "+r" (n) + ::"eax", "edx"); +} +OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm_uber4, abs_u16_s16); + static void abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -70,48 +93,31 @@ abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm2, abs_u16_s16); -#if 0 -/* This doesn't work in PIC mode */ -/* Weave two threads */ static void abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { - while (n & 1) { - *dest = ABS (*src); - OIL_INCREMENT (dest, dstr); - OIL_INCREMENT (src, sstr); - n--; - } - n /= 2; __asm__ __volatile__ ("\n" - " pushl %%ebp \n" - " movl %%eax, %%ebp \n" " .p2align 4,,15 \n" - "1: movswl (%%edi), %%eax \n" - " addl $2, %%edi \n" - " movswl (%%edi), %%ecx \n" - " addl $2, %%edi \n" - " movl %%eax, %%ebx \n" - " movl %%ecx, %%edx \n" - " negl %%ebx \n" - " negl %%edx \n" - " cmpl $-1, %%eax \n" - " cmovle %%ebx, %%eax \n" - " cmpl $-1, %%ecx \n" - " cmovle %%edx, %%ecx \n" - " movw %%ax, (%%ebp) \n" - " addl $2, %%ebp \n" - " movw %%cx, (%%ebp) \n" - " addl $2, %%ebp \n" - " decl %2 \n" - " testl %2, %2 \n" - " jg 1b \n" - " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n) - ::"ebx", "ecx", "edx", "ebp"); + "1: movswl (%1), %%eax \n" + " mov %3, %%edx \n" + " add %%edx, %1 \n" + " mov %%eax, %%edx \n" + " sar $0xf, %%ax \n" + " and %%edx, %%eax \n" + " add %%eax, %%eax \n" + " sub %%eax, %%edx \n" + " mov %%dx, (%0) \n" + " mov %4, %%edx \n" + " add %%edx, %0 \n" + " decl %2 \n" + " jne 1b \n" + : "+r" (src), "+r" (dest), "+m" (n) + : "m" (dstr), "m" (sstr) + : "eax", "edx"); } - OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16); -#endif + + static void abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) diff --git a/liboil/simdpack/abs_misc.c b/liboil/simdpack/abs_misc.c index 3ef4c25..1ba61cf 100644 --- a/liboil/simdpack/abs_misc.c +++ b/liboil/simdpack/abs_misc.c @@ -26,6 +26,27 @@ #define ABS(x) ((x)>0 ? (x) : -(x)) static void +abs_u16_s16_unroll2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) +{ + while (n & 1) { + *dest = ABS (*src); + OIL_INCREMENT (dest, dstr); + OIL_INCREMENT (src, sstr); + n--; + } + while (n > 0) { + *dest = ABS (*src); + OIL_INCREMENT (dest, dstr); + OIL_INCREMENT (src, sstr); + *dest = ABS (*src); + OIL_INCREMENT (dest, dstr); + OIL_INCREMENT (src, sstr); + n -= 2; + } +} +OIL_DEFINE_IMPL (abs_u16_s16_unroll2, abs_u16_s16); + +static void abs_u16_s16_unroll4 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { while (n & 3) { @@ -50,7 +71,6 @@ abs_u16_s16_unroll4 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) n -= 4; } } - OIL_DEFINE_IMPL (abs_u16_s16_unroll4, abs_u16_s16); static void |