summaryrefslogtreecommitdiff
path: root/liboil/simdpack
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-01-02 06:31:02 +0000
committerDavid Schleef <ds@schleef.org>2005-01-02 06:31:02 +0000
commit199e85f573a9239f1693862d3fd03fa612987f62 (patch)
treec0c28930b881af1f3a4bbf57b14db12dadef289d /liboil/simdpack
parenta428fee448709f5864103e08743e2e8f07c0c87f (diff)
downloadliboil-199e85f573a9239f1693862d3fd03fa612987f62.tar.gz
* liboil/colorspace/argb_paint_i386.c:
* liboil/colorspace/ayuv2argb_i386.c: * liboil/liboilfunction.c: (oil_class_optimize): disable functions that fail test * liboil/liboiltest.c: (oil_test_new), (check_zero), (oil_test_check_impl), (init_parameter): Fix double-free bug, plus other problems with testing from applications. * liboil/dct/idct8x8_i386.c: pshufw apparently is not MMX * liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm3), (abs_u16_s16_mmx), (abs_u16_s16_mmxx): disable code that doesn\t
Diffstat (limited to 'liboil/simdpack')
-rw-r--r--liboil/simdpack/abs_i386.c34
-rw-r--r--liboil/simdpack/clip_s32.c11
-rw-r--r--liboil/simdpack/diffsquaresum_f64.c5
-rw-r--r--liboil/simdpack/scalaradd.c3
-rw-r--r--liboil/simdpack/scalarmult.c3
5 files changed, 35 insertions, 21 deletions
diff --git a/liboil/simdpack/abs_i386.c b/liboil/simdpack/abs_i386.c
index 7ecdb3c..626065e 100644
--- a/liboil/simdpack/abs_i386.c
+++ b/liboil/simdpack/abs_i386.c
@@ -34,6 +34,7 @@
#define ABS(x) ((x)>0 ? (x) : -(x))
+#if 0
static void
abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -53,8 +54,10 @@ abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
::"eax", "edx");
}
-OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm, abs_u16_s16);
+OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
+#endif
+#if 0
/* The previous function after running through uberopt */
static void
abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
@@ -76,8 +79,10 @@ abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
:"+r" (src), "+r" (dest), "+r" (n)
::"eax", "edx");
}
-OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm_uber4, abs_u16_s16);
+OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
+#endif
+#if 0
static void
abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -99,28 +104,26 @@ abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
" popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n)
::"ecx", "edx");
}
-
-OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm2, abs_u16_s16);
+OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
+#endif
static void
abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
__asm__ __volatile__ ("\n"
" .p2align 4,,15 \n"
- "1: movswl (%1), %%eax \n"
- " mov %3, %%edx \n"
- " add %%edx, %1 \n"
+ "1: movsxw (%1), %%eax \n"
+ " add %3, %1 \n"
" mov %%eax, %%edx \n"
" sar $0xf, %%ax \n"
" and %%edx, %%eax \n"
" add %%eax, %%eax \n"
" sub %%eax, %%edx \n"
" mov %%dx, (%0) \n"
- " mov %4, %%edx \n"
- " add %%edx, %0 \n"
+ " add %4, %0 \n"
" decl %2 \n"
" jne 1b \n"
- : "+r" (src), "+r" (dest), "+m" (n)
+ : "+r" (dest), "+r" (src), "+m" (n)
: "m" (dstr), "m" (sstr)
: "eax", "edx");
}
@@ -131,10 +134,11 @@ OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16);
static void
abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
- const short p[] = { -32768, -32768, -32768, -32768,
- 32767, 32767, 32767, 32767
+ const int16_t p[][4] = {
+ { -32768, -32768, -32768, -32768 },
+ { 32767, 32767, 32767, 32767 }
};
- short tmp[4];
+ int16_t tmp[4];
while (n & 3) {
*dest = ABS (*src);
@@ -158,6 +162,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
OIL_INCREMENT (src, sstr);
__asm__ __volatile__ ("\n"
" movq (%%eax), %%mm1 \n"
+ " movq %%mm1, %%mm0 \n"
" paddsw %%mm2, %%mm0 \n"
" paddsw %%mm3, %%mm1 \n"
" psubsw %%mm2, %%mm0 \n"
@@ -180,6 +185,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
+#if 0
static void
abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -222,8 +228,8 @@ abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
:"c" (p));
asm volatile ("emms");
}
-
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
+#endif
static void
abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
diff --git a/liboil/simdpack/clip_s32.c b/liboil/simdpack/clip_s32.c
index 001ae6f..464111c 100644
--- a/liboil/simdpack/clip_s32.c
+++ b/liboil/simdpack/clip_s32.c
@@ -35,19 +35,20 @@
/* This is a suprisingly fast implementation of clipping
* in straight C. It would be difficult to do it faster in asm
* without specialized opcodes. However, this trick clips
- * the range min^(1<<31) to max^(1<<31) incorrectly. So
- * it's limited to 31 bits. */
+ * the range min^(1<<31) to max^(1<<31) incorrectly with int32_t.
+ * Thus the use of int64_t. */
static void
clip_s32_fast (int32_t *dest, int dstr, int32_t *src, int sstr, int n,
int32_t *low, int32_t *hi)
{
int i;
- int32_t x;
+ int64_t x;
for(i=0;i<n;i++){
- x = src[i];
- dest[i] = x - (((x-*low)>>31)&(x-*low)) + (((*hi-x)>>31)&(*hi-x));
+ x = OIL_GET(src,i*sstr,int32_t);
+ OIL_GET(dest,i*dstr,int32_t) = x - (((x-*low)>>31)&(x-*low))
+ + (((*hi-x)>>31)&(*hi-x));
}
}
diff --git a/liboil/simdpack/diffsquaresum_f64.c b/liboil/simdpack/diffsquaresum_f64.c
index f178902..633f90a 100644
--- a/liboil/simdpack/diffsquaresum_f64.c
+++ b/liboil/simdpack/diffsquaresum_f64.c
@@ -34,7 +34,7 @@
#include <math.h>
OIL_DEFINE_CLASS (diffsquaresum_f64,
- "double *dest, double *src1, int sstr1, double *src2, int sstr2, int n");
+ "double *d_1, double *src1, int sstr1, double *src2, int sstr2, int n");
static void
diffsquaresum_f64_ref(double *dest, double *src1, int sstr1, double *src2,
@@ -108,7 +108,8 @@ diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src
}
OIL_DEFINE_IMPL (diffsquaresum_f64_i10_unroll2, diffsquaresum_f64);
-static void diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1,
+static void
+diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1,
double *src2, int sstr2, int n)
{
double sum0 = 0;
diff --git a/liboil/simdpack/scalaradd.c b/liboil/simdpack/scalaradd.c
index 4216a05..f5f8857 100644
--- a/liboil/simdpack/scalaradd.c
+++ b/liboil/simdpack/scalaradd.c
@@ -76,6 +76,9 @@ static void scalaradd_ ## type ## _unroll2( \
*dest = *src + *val; \
OIL_INCREMENT(dest,dstr); \
OIL_INCREMENT(src,sstr); \
+ *dest = *src + *val; \
+ OIL_INCREMENT(dest,dstr); \
+ OIL_INCREMENT(src,sstr); \
n--; \
} \
} \
diff --git a/liboil/simdpack/scalarmult.c b/liboil/simdpack/scalarmult.c
index 958b3c3..f4d8bb3 100644
--- a/liboil/simdpack/scalarmult.c
+++ b/liboil/simdpack/scalarmult.c
@@ -75,6 +75,9 @@ static void scalarmult_ ## type ## _unroll2( \
*dest = *src * *val; \
OIL_INCREMENT(dest,dstr); \
OIL_INCREMENT(src,sstr); \
+ *dest = *src * *val; \
+ OIL_INCREMENT(dest,dstr); \
+ OIL_INCREMENT(src,sstr); \
n--; \
} \
} \