diff options
author | David Schleef <ds@schleef.org> | 2005-01-02 06:31:02 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-01-02 06:31:02 +0000 |
commit | 199e85f573a9239f1693862d3fd03fa612987f62 (patch) | |
tree | c0c28930b881af1f3a4bbf57b14db12dadef289d | |
parent | a428fee448709f5864103e08743e2e8f07c0c87f (diff) | |
download | liboil-199e85f573a9239f1693862d3fd03fa612987f62.tar.gz |
* liboil/colorspace/argb_paint_i386.c:
* liboil/colorspace/ayuv2argb_i386.c:
* liboil/liboilfunction.c: (oil_class_optimize): disable functions
that fail test
* liboil/liboiltest.c: (oil_test_new), (check_zero),
(oil_test_check_impl), (init_parameter): Fix double-free bug, plus
other problems with testing from applications.
* liboil/dct/idct8x8_i386.c: pshufw apparently is not MMX
* liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm3),
(abs_u16_s16_mmx), (abs_u16_s16_mmxx): disable code that doesn\t
-rw-r--r-- | ChangeLog | 20 | ||||
-rw-r--r-- | examples/work/Makefile.am | 2 | ||||
-rw-r--r-- | examples/work/work.c | 44 | ||||
-rw-r--r-- | liboil/colorspace/argb_paint_i386.c | 2 | ||||
-rw-r--r-- | liboil/colorspace/ayuv2argb_i386.c | 6 | ||||
-rw-r--r-- | liboil/dct/idct8x8_i386.c | 2 | ||||
-rw-r--r-- | liboil/liboilfunction.c | 3 | ||||
-rw-r--r-- | liboil/liboiltest.c | 47 | ||||
-rw-r--r-- | liboil/simdpack/abs_i386.c | 34 | ||||
-rw-r--r-- | liboil/simdpack/clip_s32.c | 11 | ||||
-rw-r--r-- | liboil/simdpack/diffsquaresum_f64.c | 5 | ||||
-rw-r--r-- | liboil/simdpack/scalaradd.c | 3 | ||||
-rw-r--r-- | liboil/simdpack/scalarmult.c | 3 | ||||
-rw-r--r-- | testsuite/Makefile.am | 4 |
14 files changed, 125 insertions, 61 deletions
@@ -1,3 +1,23 @@ +2005-01-01 David Schleef <ds@schleef.org> + + * liboil/colorspace/argb_paint_i386.c: + * liboil/colorspace/ayuv2argb_i386.c: + * liboil/liboilfunction.c: (oil_class_optimize): disable functions + that fail test + * liboil/liboiltest.c: (oil_test_new), (check_zero), + (oil_test_check_impl), (init_parameter): Fix double-free bug, plus + other problems with testing from applications. + * liboil/dct/idct8x8_i386.c: pshufw apparently is not MMX + * liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm3), + (abs_u16_s16_mmx), (abs_u16_s16_mmxx): disable code that doesn't + handle strides correctly. + * liboil/simdpack/clip_s32.c: (clip_s32_fast): fix code to pass test + * liboil/simdpack/diffsquaresum_f64.c: + (diffsquaresum_f64_i10_unroll4): fix prototype + * liboil/simdpack/scalaradd.c: fix implementation + * liboil/simdpack/scalarmult.c: fix implementation + * testsuite/Makefile.am: add stride test + 2004-12-30 David Schleef <ds@schleef.org> * liboil/dct/idct8x8_i386.c: (idct8x8_s16_mmx), (fdct8x8s_s16_mmx): diff --git a/examples/work/Makefile.am b/examples/work/Makefile.am index 9aa9766..f141e6a 100644 --- a/examples/work/Makefile.am +++ b/examples/work/Makefile.am @@ -1,7 +1,7 @@ noinst_PROGRAMS = work -work_SOURCES = work.c argb_paint.c +work_SOURCES = work.c AM_LDFLAGS = $(LIBOIL_LIBS) $(GLIB_LIBS) diff --git a/examples/work/work.c b/examples/work/work.c index e4addd4..8a05c18 100644 --- a/examples/work/work.c +++ b/examples/work/work.c @@ -33,6 +33,7 @@ #include <liboil/liboil.h> #include <liboil/liboilfunction.h> #include <liboil/liboilrandom.h> +#include <liboil/liboilcpu.h> #include <glib.h> #include <string.h> #include <math.h> @@ -41,27 +42,19 @@ void register_impls(void); void test(void) { - uint8_t dest[100*4]; - uint8_t color[4]; - uint8_t alpha[100]; + int16_t dest[100]; + int16_t src[100]; int i; - for(i=0;i<16;i++){ - dest[i*4+0] = 0; - dest[i*4+1] = 0; - dest[i*4+2] = 0; - dest[i*4+3] = 255; - alpha[i]=i*16; + for(i=0;i<100;i++){ + src[i] = oil_rand_s16(); + dest[i] = 0; } - color[0] = 255; - color[1] = 128; - color[2] = 10; - color[3] = 128; - oil_argb_paint_u8 (dest, color, alpha, 16); + oil_abs_u16_s16 (dest, 4, src, 4, 50); - for(i=0;i<4*16;i+=4){ - g_print("%d %d %d %d\n",dest[i+0],dest[i+1],dest[i+2],dest[i+3]); + for(i=0;i<100;i++){ + g_print("%d %d\n",dest[i],src[i]); } } @@ -70,20 +63,25 @@ int main (int argc, char *argv[]) { OilFunctionClass *klass; OilFunctionImpl *impl; + unsigned long cpu_flags; oil_init (); - register_impls(); + cpu_flags = oil_cpu_get_flags (); - klass = oil_class_get ("argb_paint_u8"); + //register_impls(); + + klass = oil_class_get ("abs_u16_s16"); oil_class_optimize (klass); for (impl = klass->first_impl; impl; impl = impl->next) { - klass->chosen_impl = impl; - klass->func = impl->func; - g_print("impl %s %g %g\n", impl->name, impl->profile_ave, - impl->profile_std); - test(); + if (((impl->flags & OIL_CPU_FLAG_MASK) & ~cpu_flags) == 0) { + klass->chosen_impl = impl; + klass->func = impl->func; + g_print("impl %s %g %g\n", impl->name, impl->profile_ave, + impl->profile_std); + test(); + } } return 0; diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c index b342a7a..f6785a4 100644 --- a/liboil/colorspace/argb_paint_i386.c +++ b/liboil/colorspace/argb_paint_i386.c @@ -83,5 +83,5 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) : "+r" (dest), "+%%eax" (color), "+r" (alpha), "+r" (n) : "r" (&constants)); } -OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); diff --git a/liboil/colorspace/ayuv2argb_i386.c b/liboil/colorspace/ayuv2argb_i386.c index 2cbdf06..fbd16ae 100644 --- a/liboil/colorspace/ayuv2argb_i386.c +++ b/liboil/colorspace/ayuv2argb_i386.c @@ -87,7 +87,7 @@ ayuv2argb_u8_mmx (uint8_t *argb, uint8_t *ayuv, int n) : "r" (&constants)); } -OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); static short constants2[][4] = { @@ -132,7 +132,7 @@ ayuv2argb_u8_mmx2 (uint8_t *argb, uint8_t *ayuv, int n) } -OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); static void ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n) @@ -170,5 +170,5 @@ ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n) : "r" (&constants2)); } -OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); diff --git a/liboil/dct/idct8x8_i386.c b/liboil/dct/idct8x8_i386.c index 713683b..ca16d9d 100644 --- a/liboil/dct/idct8x8_i386.c +++ b/liboil/dct/idct8x8_i386.c @@ -404,6 +404,6 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) : "eax", "ebx", "ecx", "edx"); } -OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE); diff --git a/liboil/liboilfunction.c b/liboil/liboilfunction.c index ef0d6fd..b05093c 100644 --- a/liboil/liboilfunction.c +++ b/liboil/liboilfunction.c @@ -186,7 +186,8 @@ oil_class_optimize (OilFunctionClass * klass) } } } else { - OIL_ERROR("uncaught error"); + OIL_WARNING("disabling implementation %s", impl->name); + impl->flags |= OIL_IMPL_FLAG_DISABLED; } } klass->chosen_impl = min_impl; diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c index ad885f6..2c987b0 100644 --- a/liboil/liboiltest.c +++ b/liboil/liboiltest.c @@ -69,6 +69,11 @@ oil_test_new (OilFunctionClass *klass) memcpy (&test->params[proto->params[i].parameter_type], &proto->params[i], sizeof(OilParameter)); } + for (i=0;i<OIL_ARG_LAST;i++){ + test->params[i].src_data = NULL; + test->params[i].ref_data = NULL; + test->params[i].test_data = NULL; + } test->iterations = 10; test->n = 100; @@ -209,19 +214,30 @@ oil_test_check_ref (OilTest *test) test->tested_ref = 1; } +static int +check_zero (uint8_t *data, int n) +{ + int i; + for(i=0;i<n;i++) { + if (data[i] != 0) return 0; + } + return 1; +} + int oil_test_check_impl (OilTest *test, OilFunctionImpl *impl) { double x; int i; int n; + int fail = 0; if (test->proto->n_params > 10) { OIL_ERROR ("function has too many parameters"); return 0; } - if (!test->tested_ref) { + if (!test->inited || !test->tested_ref) { oil_test_check_ref(test); } @@ -238,18 +254,30 @@ oil_test_check_impl (OilTest *test, OilFunctionImpl *impl) x += check_array (p->ref_data, p->test_data, p->type, p->pre_n, p->stride, p->post_n); n += p->pre_n * p->post_n; + if (!check_zero (p->test_data, OIL_TEST_HEADER)) { + fail = 1; + OIL_ERROR("function %s wrote before area for parameter %s", + test->impl->name, p->parameter_name); + } + if (!check_zero ((uint8_t *)p->test_data + p->size - OIL_TEST_FOOTER, + OIL_TEST_FOOTER)) { + fail = 1; + OIL_ERROR("function %s wrote after area for parameter %s", + test->impl->name, p->parameter_name); + } } } } OIL_DEBUG("sum of absolute differences %g for %d values", x, n); - if (x > n) { - OIL_ERROR ("function %s in class %s failed check (%g > %d)", - test->impl->name, test->klass->name, x, n); - } - test->sum_abs_diff = x; test->n_points = n; + if (x > n || fail) { + OIL_ERROR ("function %s in class %s failed check (%g > %d) outside=%d", + test->impl->name, test->klass->name, x, n, fail); + return 0; + } + return 1; } @@ -333,17 +361,20 @@ init_parameter (OilTest *test, OilParameter *p, OilParameter *ps) if (p->direction == 'i' || p->direction == 's') { if (p->src_data) free (p->src_data); + + OIL_DEBUG("allocating %d bytes for src_data for %s", p->size, p->parameter_name); p->src_data = malloc (p->size); memset (p->src_data, 0, p->size); - fill_array (p->src_data, p->type, p->pre_n, p->stride, p->post_n); + fill_array (p->src_data + OIL_TEST_HEADER, p->type, p->pre_n, p->stride, p->post_n); } if (p->direction == 'i' || p->direction == 'd') { if (p->ref_data) free (p->ref_data); p->ref_data = malloc (p->size); memset (p->ref_data, 0, p->size); + OIL_DEBUG("allocating %d bytes for ref_data and test_data for %s", p->size, p->parameter_name); - if (p->test_data) free (p->ref_data); + if (p->test_data) free (p->test_data); p->test_data = malloc (p->size); memset (p->test_data, 0, p->size); } diff --git a/liboil/simdpack/abs_i386.c b/liboil/simdpack/abs_i386.c index 7ecdb3c..626065e 100644 --- a/liboil/simdpack/abs_i386.c +++ b/liboil/simdpack/abs_i386.c @@ -34,6 +34,7 @@ #define ABS(x) ((x)>0 ? (x) : -(x)) +#if 0 static void abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -53,8 +54,10 @@ abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) ::"eax", "edx"); } -OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm, abs_u16_s16); +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV); +#endif +#if 0 /* The previous function after running through uberopt */ static void abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src, @@ -76,8 +79,10 @@ abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src, :"+r" (src), "+r" (dest), "+r" (n) ::"eax", "edx"); } -OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm_uber4, abs_u16_s16); +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV); +#endif +#if 0 static void abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -99,28 +104,26 @@ abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) " popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n) ::"ecx", "edx"); } - -OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm2, abs_u16_s16); +OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV); +#endif static void abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { __asm__ __volatile__ ("\n" " .p2align 4,,15 \n" - "1: movswl (%1), %%eax \n" - " mov %3, %%edx \n" - " add %%edx, %1 \n" + "1: movsxw (%1), %%eax \n" + " add %3, %1 \n" " mov %%eax, %%edx \n" " sar $0xf, %%ax \n" " and %%edx, %%eax \n" " add %%eax, %%eax \n" " sub %%eax, %%edx \n" " mov %%dx, (%0) \n" - " mov %4, %%edx \n" - " add %%edx, %0 \n" + " add %4, %0 \n" " decl %2 \n" " jne 1b \n" - : "+r" (src), "+r" (dest), "+m" (n) + : "+r" (dest), "+r" (src), "+m" (n) : "m" (dstr), "m" (sstr) : "eax", "edx"); } @@ -131,10 +134,11 @@ OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16); static void abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { - const short p[] = { -32768, -32768, -32768, -32768, - 32767, 32767, 32767, 32767 + const int16_t p[][4] = { + { -32768, -32768, -32768, -32768 }, + { 32767, 32767, 32767, 32767 } }; - short tmp[4]; + int16_t tmp[4]; while (n & 3) { *dest = ABS (*src); @@ -158,6 +162,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) OIL_INCREMENT (src, sstr); __asm__ __volatile__ ("\n" " movq (%%eax), %%mm1 \n" + " movq %%mm1, %%mm0 \n" " paddsw %%mm2, %%mm0 \n" " paddsw %%mm3, %%mm1 \n" " psubsw %%mm2, %%mm0 \n" @@ -180,6 +185,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX); +#if 0 static void abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) { @@ -222,8 +228,8 @@ abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) :"c" (p)); asm volatile ("emms"); } - OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX); +#endif static void abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n) diff --git a/liboil/simdpack/clip_s32.c b/liboil/simdpack/clip_s32.c index 001ae6f..464111c 100644 --- a/liboil/simdpack/clip_s32.c +++ b/liboil/simdpack/clip_s32.c @@ -35,19 +35,20 @@ /* This is a suprisingly fast implementation of clipping * in straight C. It would be difficult to do it faster in asm * without specialized opcodes. However, this trick clips - * the range min^(1<<31) to max^(1<<31) incorrectly. So - * it's limited to 31 bits. */ + * the range min^(1<<31) to max^(1<<31) incorrectly with int32_t. + * Thus the use of int64_t. */ static void clip_s32_fast (int32_t *dest, int dstr, int32_t *src, int sstr, int n, int32_t *low, int32_t *hi) { int i; - int32_t x; + int64_t x; for(i=0;i<n;i++){ - x = src[i]; - dest[i] = x - (((x-*low)>>31)&(x-*low)) + (((*hi-x)>>31)&(*hi-x)); + x = OIL_GET(src,i*sstr,int32_t); + OIL_GET(dest,i*dstr,int32_t) = x - (((x-*low)>>31)&(x-*low)) + + (((*hi-x)>>31)&(*hi-x)); } } diff --git a/liboil/simdpack/diffsquaresum_f64.c b/liboil/simdpack/diffsquaresum_f64.c index f178902..633f90a 100644 --- a/liboil/simdpack/diffsquaresum_f64.c +++ b/liboil/simdpack/diffsquaresum_f64.c @@ -34,7 +34,7 @@ #include <math.h> OIL_DEFINE_CLASS (diffsquaresum_f64, - "double *dest, double *src1, int sstr1, double *src2, int sstr2, int n"); + "double *d_1, double *src1, int sstr1, double *src2, int sstr2, int n"); static void diffsquaresum_f64_ref(double *dest, double *src1, int sstr1, double *src2, @@ -108,7 +108,8 @@ diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src } OIL_DEFINE_IMPL (diffsquaresum_f64_i10_unroll2, diffsquaresum_f64); -static void diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1, +static void +diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1, double *src2, int sstr2, int n) { double sum0 = 0; diff --git a/liboil/simdpack/scalaradd.c b/liboil/simdpack/scalaradd.c index 4216a05..f5f8857 100644 --- a/liboil/simdpack/scalaradd.c +++ b/liboil/simdpack/scalaradd.c @@ -76,6 +76,9 @@ static void scalaradd_ ## type ## _unroll2( \ *dest = *src + *val; \ OIL_INCREMENT(dest,dstr); \ OIL_INCREMENT(src,sstr); \ + *dest = *src + *val; \ + OIL_INCREMENT(dest,dstr); \ + OIL_INCREMENT(src,sstr); \ n--; \ } \ } \ diff --git a/liboil/simdpack/scalarmult.c b/liboil/simdpack/scalarmult.c index 958b3c3..f4d8bb3 100644 --- a/liboil/simdpack/scalarmult.c +++ b/liboil/simdpack/scalarmult.c @@ -75,6 +75,9 @@ static void scalarmult_ ## type ## _unroll2( \ *dest = *src * *val; \ OIL_INCREMENT(dest,dstr); \ OIL_INCREMENT(src,sstr); \ + *dest = *src * *val; \ + OIL_INCREMENT(dest,dstr); \ + OIL_INCREMENT(src,sstr); \ n--; \ } \ } \ diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am index 9d78ff4..8388c95 100644 --- a/testsuite/Makefile.am +++ b/testsuite/Makefile.am @@ -1,7 +1,7 @@ -check_PROGRAMS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy +check_PROGRAMS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy stride -TESTS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy +TESTS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy stride AM_LDFLAGS = $(LIBOIL_LIBS) $(GLIB_LIBS) AM_CFLAGS = $(LIBOIL_CFLAGS) $(GLIB_CFLAGS) |