summaryrefslogtreecommitdiff
path: root/liboil/simdpack
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2004-12-06 07:20:34 +0000
committerDavid Schleef <ds@schleef.org>2004-12-06 07:20:34 +0000
commitcbe265592ffcf7b841ca7a3e3e4b196c5561d371 (patch)
tree5cbe41f25dfbbead6ba364412078c4f354b7c0eb /liboil/simdpack
parentd94b113b3a8fcce9ec6c2170e39614c22b3bacf6 (diff)
downloadliboil-cbe265592ffcf7b841ca7a3e3e4b196c5561d371.tar.gz
* configure.ac: add uberopt
* examples/Makefile.am: same * examples/oil-inspect.c: (impl_compare), (oil_print_class): sort implementations by profile_ave before printing. * examples/uberopt/Makefile.am: add uberopt * examples/uberopt/example.c: (abs_u16_s16_i386asm_UBER_INDEX): * examples/uberopt/uberopt.c: (main): * liboil/copy/Makefile.am: add trans8x8_i386.c * liboil/copy/copy.c: not reference functions * liboil/copy/trans8x8.c: fix reference function (oops!) * liboil/copy/trans8x8_i386.c: new. mmx implementation * liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm_uber4), (abs_u16_s16_i386asm3): new impl created by uberopt * liboil/simdpack/abs_misc.c: (abs_u16_s16_unroll2), (abs_u16_s16_unroll4): add unroll2
Diffstat (limited to 'liboil/simdpack')
-rw-r--r--liboil/simdpack/abs_i386.c76
-rw-r--r--liboil/simdpack/abs_misc.c22
2 files changed, 62 insertions, 36 deletions
diff --git a/liboil/simdpack/abs_i386.c b/liboil/simdpack/abs_i386.c
index 086892a..5a9e229 100644
--- a/liboil/simdpack/abs_i386.c
+++ b/liboil/simdpack/abs_i386.c
@@ -46,6 +46,29 @@ abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm, abs_u16_s16);
+/* The previous function after running through uberopt */
+static void
+abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
+ int sstr, int n)
+{
+ __asm__ __volatile__ ("\n"
+ " .p2align 4,,15 \n"
+ "1: \n"
+ " movswl (%0), %%eax \n" /* UBER 0: */
+ " addl $2, %0 \n" /* UBER 1: 0 */
+ " movl %%eax, %%edx \n" /* UBER 2: 0 */
+ " decl %2 \n" /* UBER 7: */
+ " negl %%edx \n" /* UBER 3: 2 */
+ " cmpl $-1, %%eax ; cmovle %%edx, %%eax \n" /* UBER 4: 3 */
+ " movw %%ax, (%1) \n" /* UBER 5: 4 */
+ " addl $2, %1 \n" /* UBER 6: 5 */
+ " testl %2, %2 \n"
+ " jg 1b \n"
+ :"+r" (src), "+r" (dest), "+r" (n)
+ ::"eax", "edx");
+}
+OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm_uber4, abs_u16_s16);
+
static void
abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -70,48 +93,31 @@ abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm2, abs_u16_s16);
-#if 0
-/* This doesn't work in PIC mode */
-/* Weave two threads */
static void
abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
- while (n & 1) {
- *dest = ABS (*src);
- OIL_INCREMENT (dest, dstr);
- OIL_INCREMENT (src, sstr);
- n--;
- }
- n /= 2;
__asm__ __volatile__ ("\n"
- " pushl %%ebp \n"
- " movl %%eax, %%ebp \n"
" .p2align 4,,15 \n"
- "1: movswl (%%edi), %%eax \n"
- " addl $2, %%edi \n"
- " movswl (%%edi), %%ecx \n"
- " addl $2, %%edi \n"
- " movl %%eax, %%ebx \n"
- " movl %%ecx, %%edx \n"
- " negl %%ebx \n"
- " negl %%edx \n"
- " cmpl $-1, %%eax \n"
- " cmovle %%ebx, %%eax \n"
- " cmpl $-1, %%ecx \n"
- " cmovle %%edx, %%ecx \n"
- " movw %%ax, (%%ebp) \n"
- " addl $2, %%ebp \n"
- " movw %%cx, (%%ebp) \n"
- " addl $2, %%ebp \n"
- " decl %2 \n"
- " testl %2, %2 \n"
- " jg 1b \n"
- " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n)
- ::"ebx", "ecx", "edx", "ebp");
+ "1: movswl (%1), %%eax \n"
+ " mov %3, %%edx \n"
+ " add %%edx, %1 \n"
+ " mov %%eax, %%edx \n"
+ " sar $0xf, %%ax \n"
+ " and %%edx, %%eax \n"
+ " add %%eax, %%eax \n"
+ " sub %%eax, %%edx \n"
+ " mov %%dx, (%0) \n"
+ " mov %4, %%edx \n"
+ " add %%edx, %0 \n"
+ " decl %2 \n"
+ " jne 1b \n"
+ : "+r" (src), "+r" (dest), "+m" (n)
+ : "m" (dstr), "m" (sstr)
+ : "eax", "edx");
}
-
OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16);
-#endif
+
+
static void
abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
diff --git a/liboil/simdpack/abs_misc.c b/liboil/simdpack/abs_misc.c
index 3ef4c25..1ba61cf 100644
--- a/liboil/simdpack/abs_misc.c
+++ b/liboil/simdpack/abs_misc.c
@@ -26,6 +26,27 @@
#define ABS(x) ((x)>0 ? (x) : -(x))
static void
+abs_u16_s16_unroll2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
+{
+ while (n & 1) {
+ *dest = ABS (*src);
+ OIL_INCREMENT (dest, dstr);
+ OIL_INCREMENT (src, sstr);
+ n--;
+ }
+ while (n > 0) {
+ *dest = ABS (*src);
+ OIL_INCREMENT (dest, dstr);
+ OIL_INCREMENT (src, sstr);
+ *dest = ABS (*src);
+ OIL_INCREMENT (dest, dstr);
+ OIL_INCREMENT (src, sstr);
+ n -= 2;
+ }
+}
+OIL_DEFINE_IMPL (abs_u16_s16_unroll2, abs_u16_s16);
+
+static void
abs_u16_s16_unroll4 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
while (n & 3) {
@@ -50,7 +71,6 @@ abs_u16_s16_unroll4 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
n -= 4;
}
}
-
OIL_DEFINE_IMPL (abs_u16_s16_unroll4, abs_u16_s16);
static void