diff options
author | David Schleef <ds@schleef.org> | 2005-01-02 06:31:02 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-01-02 06:31:02 +0000 |
commit | 199e85f573a9239f1693862d3fd03fa612987f62 (patch) | |
tree | c0c28930b881af1f3a4bbf57b14db12dadef289d /liboil/simdpack | |
parent | a428fee448709f5864103e08743e2e8f07c0c87f (diff) | |
download | liboil-199e85f573a9239f1693862d3fd03fa612987f62.tar.gz |
* liboil/colorspace/argb_paint_i386.c:
* liboil/colorspace/ayuv2argb_i386.c:
* liboil/liboilfunction.c: (oil_class_optimize): disable functions
that fail test
* liboil/liboiltest.c: (oil_test_new), (check_zero),
(oil_test_check_impl), (init_parameter): Fix double-free bug, plus
other problems with testing from applications.
* liboil/dct/idct8x8_i386.c: pshufw apparently is not MMX
* liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm3),
(abs_u16_s16_mmx), (abs_u16_s16_mmxx): disable code that doesn\t
Diffstat (limited to 'liboil/simdpack')
-rw-r--r-- | liboil/simdpack/abs_i386.c | 34 | ||||
-rw-r--r-- | liboil/simdpack/clip_s32.c | 11 | ||||
-rw-r--r-- | liboil/simdpack/diffsquaresum_f64.c | 5 | ||||
-rw-r--r-- | liboil/simdpack/scalaradd.c | 3 | ||||
-rw-r--r-- | liboil/simdpack/scalarmult.c | 3 |
5 files changed, 35 insertions, 21 deletions
diff --git a/liboil/simdpack/abs_i386.c b/liboil/simdpack/abs_i386.c index 7ecdb3c..626065e 100644 --- a/liboil/simdpack/abs_i386.c +++ b/liboil/simdpack/abs_i386.c @@ -34,6 +34,7 @@ #define ABS(x) ((x)>0 ? (x) : -(x)) +#if 0 static void abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -53,8 +54,10 @@ abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) ::"eax", "edx"); } -OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm, abs_u16_s16); +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV); +#endif +#if 0 /* The previous function after running through uberopt */ static void abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src, @@ -76,8 +79,10 @@ abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src, :"+r" (src), "+r" (dest), "+r" (n) ::"eax", "edx"); } -OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm_uber4, abs_u16_s16); +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV); +#endif +#if 0 static void abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -99,28 +104,26 @@ abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n) ::"ecx", "edx"); } - -OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm2, abs_u16_s16); +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV); +#endif static void abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { __asm__ __volatile__ ("\n" " .p2align 4,,15 \n" - "1: movswl (%1), %%eax \n" - " mov %3, %%edx \n" - " add %%edx, %1 \n" + "1: movsxw (%1), %%eax \n" + " add %3, %1 \n" " mov %%eax, %%edx \n" " sar $0xf, %%ax \n" " and %%edx, %%eax \n" " add %%eax, %%eax \n" " sub %%eax, %%edx \n" " mov %%dx, (%0) \n" - " mov %4, %%edx \n" - " add %%edx, %0 \n" + " add %4, %0 \n" " decl %2 \n" " jne 1b \n" - : "+r" (src), "+r" (dest), "+m" (n) + : "+r" (dest), "+r" (src), "+m" (n) : "m" (dstr), "m" (sstr) : "eax", "edx"); } @@ -131,10 +134,11 @@ OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16); static void abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { - const short p[] = { -32768, -32768, -32768, -32768, - 32767, 32767, 32767, 32767 + const int16_t p[][4] = { + { -32768, -32768, -32768, -32768 }, + { 32767, 32767, 32767, 32767 } }; - short tmp[4]; + int16_t tmp[4]; while (n & 3) { *dest = ABS (*src); @@ -158,6 +162,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) OIL_INCREMENT (src, sstr); __asm__ __volatile__ ("\n" " movq (%%eax), %%mm1 \n" + " movq %%mm1, %%mm0 \n" " paddsw %%mm2, %%mm0 \n" " paddsw %%mm3, %%mm1 \n" " psubsw %%mm2, %%mm0 \n" @@ -180,6 +185,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX); +#if 0 static void abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -222,8 +228,8 @@ abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) :"c" (p)); asm volatile ("emms"); } - OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX); +#endif static void abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) diff --git a/liboil/simdpack/clip_s32.c b/liboil/simdpack/clip_s32.c index 001ae6f..464111c 100644 --- a/liboil/simdpack/clip_s32.c +++ b/liboil/simdpack/clip_s32.c @@ -35,19 +35,20 @@ /* This is a suprisingly fast implementation of clipping * in straight C. It would be difficult to do it faster in asm * without specialized opcodes. However, this trick clips - * the range min^(1<<31) to max^(1<<31) incorrectly. So - * it's limited to 31 bits. */ + * the range min^(1<<31) to max^(1<<31) incorrectly with int32_t. + * Thus the use of int64_t. */ static void clip_s32_fast (int32_t *dest, int dstr, int32_t *src, int sstr, int n, int32_t *low, int32_t *hi) { int i; - int32_t x; + int64_t x; for(i=0;i<n;i++){ - x = src[i]; - dest[i] = x - (((x-*low)>>31)&(x-*low)) + (((*hi-x)>>31)&(*hi-x)); + x = OIL_GET(src,i*sstr,int32_t); + OIL_GET(dest,i*dstr,int32_t) = x - (((x-*low)>>31)&(x-*low)) + + (((*hi-x)>>31)&(*hi-x)); } } diff --git a/liboil/simdpack/diffsquaresum_f64.c b/liboil/simdpack/diffsquaresum_f64.c index f178902..633f90a 100644 --- a/liboil/simdpack/diffsquaresum_f64.c +++ b/liboil/simdpack/diffsquaresum_f64.c @@ -34,7 +34,7 @@ #include <math.h> OIL_DEFINE_CLASS (diffsquaresum_f64, - "double *dest, double *src1, int sstr1, double *src2, int sstr2, int n"); + "double *d_1, double *src1, int sstr1, double *src2, int sstr2, int n"); static void diffsquaresum_f64_ref(double *dest, double *src1, int sstr1, double *src2, @@ -108,7 +108,8 @@ diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src } OIL_DEFINE_IMPL (diffsquaresum_f64_i10_unroll2, diffsquaresum_f64); -static void diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1, +static void +diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1, double *src2, int sstr2, int n) { double sum0 = 0; diff --git a/liboil/simdpack/scalaradd.c b/liboil/simdpack/scalaradd.c index 4216a05..f5f8857 100644 --- a/liboil/simdpack/scalaradd.c +++ b/liboil/simdpack/scalaradd.c @@ -76,6 +76,9 @@ static void scalaradd_ ## type ## _unroll2( \ *dest = *src + *val; \ OIL_INCREMENT(dest,dstr); \ OIL_INCREMENT(src,sstr); \ + *dest = *src + *val; \ + OIL_INCREMENT(dest,dstr); \ + OIL_INCREMENT(src,sstr); \ n--; \ } \ } \ diff --git a/liboil/simdpack/scalarmult.c b/liboil/simdpack/scalarmult.c index 958b3c3..f4d8bb3 100644 --- a/liboil/simdpack/scalarmult.c +++ b/liboil/simdpack/scalarmult.c @@ -75,6 +75,9 @@ static void scalarmult_ ## type ## _unroll2( \ *dest = *src * *val; \ OIL_INCREMENT(dest,dstr); \ OIL_INCREMENT(src,sstr); \ + *dest = *src * *val; \ + OIL_INCREMENT(dest,dstr); \ + OIL_INCREMENT(src,sstr); \ n--; \ } \ } \ |