summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-01-02 06:31:02 +0000
committerDavid Schleef <ds@schleef.org>2005-01-02 06:31:02 +0000
commit199e85f573a9239f1693862d3fd03fa612987f62 (patch)
treec0c28930b881af1f3a4bbf57b14db12dadef289d
parenta428fee448709f5864103e08743e2e8f07c0c87f (diff)
downloadliboil-199e85f573a9239f1693862d3fd03fa612987f62.tar.gz
* liboil/colorspace/argb_paint_i386.c:
* liboil/colorspace/ayuv2argb_i386.c: * liboil/liboilfunction.c: (oil_class_optimize): disable functions that fail test * liboil/liboiltest.c: (oil_test_new), (check_zero), (oil_test_check_impl), (init_parameter): Fix double-free bug, plus other problems with testing from applications. * liboil/dct/idct8x8_i386.c: pshufw apparently is not MMX * liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm3), (abs_u16_s16_mmx), (abs_u16_s16_mmxx): disable code that doesn\t
-rw-r--r--ChangeLog20
-rw-r--r--examples/work/Makefile.am2
-rw-r--r--examples/work/work.c44
-rw-r--r--liboil/colorspace/argb_paint_i386.c2
-rw-r--r--liboil/colorspace/ayuv2argb_i386.c6
-rw-r--r--liboil/dct/idct8x8_i386.c2
-rw-r--r--liboil/liboilfunction.c3
-rw-r--r--liboil/liboiltest.c47
-rw-r--r--liboil/simdpack/abs_i386.c34
-rw-r--r--liboil/simdpack/clip_s32.c11
-rw-r--r--liboil/simdpack/diffsquaresum_f64.c5
-rw-r--r--liboil/simdpack/scalaradd.c3
-rw-r--r--liboil/simdpack/scalarmult.c3
-rw-r--r--testsuite/Makefile.am4
14 files changed, 125 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index f7df848..d863883 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2005-01-01 David Schleef <ds@schleef.org>
+
+ * liboil/colorspace/argb_paint_i386.c:
+ * liboil/colorspace/ayuv2argb_i386.c:
+ * liboil/liboilfunction.c: (oil_class_optimize): disable functions
+ that fail test
+ * liboil/liboiltest.c: (oil_test_new), (check_zero),
+ (oil_test_check_impl), (init_parameter): Fix double-free bug, plus
+ other problems with testing from applications.
+ * liboil/dct/idct8x8_i386.c: pshufw apparently is not MMX
+ * liboil/simdpack/abs_i386.c: (abs_u16_s16_i386asm3),
+ (abs_u16_s16_mmx), (abs_u16_s16_mmxx): disable code that doesn't
+ handle strides correctly.
+ * liboil/simdpack/clip_s32.c: (clip_s32_fast): fix code to pass test
+ * liboil/simdpack/diffsquaresum_f64.c:
+ (diffsquaresum_f64_i10_unroll4): fix prototype
+ * liboil/simdpack/scalaradd.c: fix implementation
+ * liboil/simdpack/scalarmult.c: fix implementation
+ * testsuite/Makefile.am: add stride test
+
2004-12-30 David Schleef <ds@schleef.org>
* liboil/dct/idct8x8_i386.c: (idct8x8_s16_mmx), (fdct8x8s_s16_mmx):
diff --git a/examples/work/Makefile.am b/examples/work/Makefile.am
index 9aa9766..f141e6a 100644
--- a/examples/work/Makefile.am
+++ b/examples/work/Makefile.am
@@ -1,7 +1,7 @@
noinst_PROGRAMS = work
-work_SOURCES = work.c argb_paint.c
+work_SOURCES = work.c
AM_LDFLAGS = $(LIBOIL_LIBS) $(GLIB_LIBS)
diff --git a/examples/work/work.c b/examples/work/work.c
index e4addd4..8a05c18 100644
--- a/examples/work/work.c
+++ b/examples/work/work.c
@@ -33,6 +33,7 @@
#include <liboil/liboil.h>
#include <liboil/liboilfunction.h>
#include <liboil/liboilrandom.h>
+#include <liboil/liboilcpu.h>
#include <glib.h>
#include <string.h>
#include <math.h>
@@ -41,27 +42,19 @@ void register_impls(void);
void test(void)
{
- uint8_t dest[100*4];
- uint8_t color[4];
- uint8_t alpha[100];
+ int16_t dest[100];
+ int16_t src[100];
int i;
- for(i=0;i<16;i++){
- dest[i*4+0] = 0;
- dest[i*4+1] = 0;
- dest[i*4+2] = 0;
- dest[i*4+3] = 255;
- alpha[i]=i*16;
+ for(i=0;i<100;i++){
+ src[i] = oil_rand_s16();
+ dest[i] = 0;
}
- color[0] = 255;
- color[1] = 128;
- color[2] = 10;
- color[3] = 128;
- oil_argb_paint_u8 (dest, color, alpha, 16);
+ oil_abs_u16_s16 (dest, 4, src, 4, 50);
- for(i=0;i<4*16;i+=4){
- g_print("%d %d %d %d\n",dest[i+0],dest[i+1],dest[i+2],dest[i+3]);
+ for(i=0;i<100;i++){
+ g_print("%d %d\n",dest[i],src[i]);
}
}
@@ -70,20 +63,25 @@ int main (int argc, char *argv[])
{
OilFunctionClass *klass;
OilFunctionImpl *impl;
+ unsigned long cpu_flags;
oil_init ();
- register_impls();
+ cpu_flags = oil_cpu_get_flags ();
- klass = oil_class_get ("argb_paint_u8");
+ //register_impls();
+
+ klass = oil_class_get ("abs_u16_s16");
oil_class_optimize (klass);
for (impl = klass->first_impl; impl; impl = impl->next) {
- klass->chosen_impl = impl;
- klass->func = impl->func;
- g_print("impl %s %g %g\n", impl->name, impl->profile_ave,
- impl->profile_std);
- test();
+ if (((impl->flags & OIL_CPU_FLAG_MASK) & ~cpu_flags) == 0) {
+ klass->chosen_impl = impl;
+ klass->func = impl->func;
+ g_print("impl %s %g %g\n", impl->name, impl->profile_ave,
+ impl->profile_std);
+ test();
+ }
}
return 0;
diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c
index b342a7a..f6785a4 100644
--- a/liboil/colorspace/argb_paint_i386.c
+++ b/liboil/colorspace/argb_paint_i386.c
@@ -83,5 +83,5 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
: "+r" (dest), "+%%eax" (color), "+r" (alpha), "+r" (n)
: "r" (&constants));
}
-OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
diff --git a/liboil/colorspace/ayuv2argb_i386.c b/liboil/colorspace/ayuv2argb_i386.c
index 2cbdf06..fbd16ae 100644
--- a/liboil/colorspace/ayuv2argb_i386.c
+++ b/liboil/colorspace/ayuv2argb_i386.c
@@ -87,7 +87,7 @@ ayuv2argb_u8_mmx (uint8_t *argb, uint8_t *ayuv, int n)
: "r" (&constants));
}
-OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
static short constants2[][4] = {
@@ -132,7 +132,7 @@ ayuv2argb_u8_mmx2 (uint8_t *argb, uint8_t *ayuv, int n)
}
-OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
static void
ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n)
@@ -170,5 +170,5 @@ ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n)
: "r" (&constants2));
}
-OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
diff --git a/liboil/dct/idct8x8_i386.c b/liboil/dct/idct8x8_i386.c
index 713683b..ca16d9d 100644
--- a/liboil/dct/idct8x8_i386.c
+++ b/liboil/dct/idct8x8_i386.c
@@ -404,6 +404,6 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
: "eax", "ebx", "ecx", "edx");
}
-OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE);
diff --git a/liboil/liboilfunction.c b/liboil/liboilfunction.c
index ef0d6fd..b05093c 100644
--- a/liboil/liboilfunction.c
+++ b/liboil/liboilfunction.c
@@ -186,7 +186,8 @@ oil_class_optimize (OilFunctionClass * klass)
}
}
} else {
- OIL_ERROR("uncaught error");
+ OIL_WARNING("disabling implementation %s", impl->name);
+ impl->flags |= OIL_IMPL_FLAG_DISABLED;
}
}
klass->chosen_impl = min_impl;
diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c
index ad885f6..2c987b0 100644
--- a/liboil/liboiltest.c
+++ b/liboil/liboiltest.c
@@ -69,6 +69,11 @@ oil_test_new (OilFunctionClass *klass)
memcpy (&test->params[proto->params[i].parameter_type], &proto->params[i],
sizeof(OilParameter));
}
+ for (i=0;i<OIL_ARG_LAST;i++){
+ test->params[i].src_data = NULL;
+ test->params[i].ref_data = NULL;
+ test->params[i].test_data = NULL;
+ }
test->iterations = 10;
test->n = 100;
@@ -209,19 +214,30 @@ oil_test_check_ref (OilTest *test)
test->tested_ref = 1;
}
+static int
+check_zero (uint8_t *data, int n)
+{
+ int i;
+ for(i=0;i<n;i++) {
+ if (data[i] != 0) return 0;
+ }
+ return 1;
+}
+
int
oil_test_check_impl (OilTest *test, OilFunctionImpl *impl)
{
double x;
int i;
int n;
+ int fail = 0;
if (test->proto->n_params > 10) {
OIL_ERROR ("function has too many parameters");
return 0;
}
- if (!test->tested_ref) {
+ if (!test->inited || !test->tested_ref) {
oil_test_check_ref(test);
}
@@ -238,18 +254,30 @@ oil_test_check_impl (OilTest *test, OilFunctionImpl *impl)
x += check_array (p->ref_data, p->test_data, p->type, p->pre_n,
p->stride, p->post_n);
n += p->pre_n * p->post_n;
+ if (!check_zero (p->test_data, OIL_TEST_HEADER)) {
+ fail = 1;
+ OIL_ERROR("function %s wrote before area for parameter %s",
+ test->impl->name, p->parameter_name);
+ }
+ if (!check_zero ((uint8_t *)p->test_data + p->size - OIL_TEST_FOOTER,
+ OIL_TEST_FOOTER)) {
+ fail = 1;
+ OIL_ERROR("function %s wrote after area for parameter %s",
+ test->impl->name, p->parameter_name);
+ }
}
}
}
OIL_DEBUG("sum of absolute differences %g for %d values", x, n);
- if (x > n) {
- OIL_ERROR ("function %s in class %s failed check (%g > %d)",
- test->impl->name, test->klass->name, x, n);
- }
-
test->sum_abs_diff = x;
test->n_points = n;
+ if (x > n || fail) {
+ OIL_ERROR ("function %s in class %s failed check (%g > %d) outside=%d",
+ test->impl->name, test->klass->name, x, n, fail);
+ return 0;
+ }
+
return 1;
}
@@ -333,17 +361,20 @@ init_parameter (OilTest *test, OilParameter *p, OilParameter *ps)
if (p->direction == 'i' || p->direction == 's') {
if (p->src_data) free (p->src_data);
+
+ OIL_DEBUG("allocating %d bytes for src_data for %s", p->size, p->parameter_name);
p->src_data = malloc (p->size);
memset (p->src_data, 0, p->size);
- fill_array (p->src_data, p->type, p->pre_n, p->stride, p->post_n);
+ fill_array (p->src_data + OIL_TEST_HEADER, p->type, p->pre_n, p->stride, p->post_n);
}
if (p->direction == 'i' || p->direction == 'd') {
if (p->ref_data) free (p->ref_data);
p->ref_data = malloc (p->size);
memset (p->ref_data, 0, p->size);
+ OIL_DEBUG("allocating %d bytes for ref_data and test_data for %s", p->size, p->parameter_name);
- if (p->test_data) free (p->ref_data);
+ if (p->test_data) free (p->test_data);
p->test_data = malloc (p->size);
memset (p->test_data, 0, p->size);
}
diff --git a/liboil/simdpack/abs_i386.c b/liboil/simdpack/abs_i386.c
index 7ecdb3c..626065e 100644
--- a/liboil/simdpack/abs_i386.c
+++ b/liboil/simdpack/abs_i386.c
@@ -34,6 +34,7 @@
#define ABS(x) ((x)>0 ? (x) : -(x))
+#if 0
static void
abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -53,8 +54,10 @@ abs_u16_s16_i386asm (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
::"eax", "edx");
}
-OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm, abs_u16_s16);
+OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
+#endif
+#if 0
/* The previous function after running through uberopt */
static void
abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
@@ -76,8 +79,10 @@ abs_u16_s16_i386asm_uber4 (uint16_t * dest, int dstr, int16_t * src,
:"+r" (src), "+r" (dest), "+r" (n)
::"eax", "edx");
}
-OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm_uber4, abs_u16_s16);
+OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm_uber4, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
+#endif
+#if 0
static void
abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -99,28 +104,26 @@ abs_u16_s16_i386asm2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
" popl %%ebp \n":"+D" (src), "+a" (dest), "+S" (n)
::"ecx", "edx");
}
-
-OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm2, abs_u16_s16);
+OIL_DEFINE_IMPL_FULL (abs_u16_s16_i386asm2, abs_u16_s16, OIL_IMPL_FLAG_CMOV);
+#endif
static void
abs_u16_s16_i386asm3 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
__asm__ __volatile__ ("\n"
" .p2align 4,,15 \n"
- "1: movswl (%1), %%eax \n"
- " mov %3, %%edx \n"
- " add %%edx, %1 \n"
+ "1: movsxw (%1), %%eax \n"
+ " add %3, %1 \n"
" mov %%eax, %%edx \n"
" sar $0xf, %%ax \n"
" and %%edx, %%eax \n"
" add %%eax, %%eax \n"
" sub %%eax, %%edx \n"
" mov %%dx, (%0) \n"
- " mov %4, %%edx \n"
- " add %%edx, %0 \n"
+ " add %4, %0 \n"
" decl %2 \n"
" jne 1b \n"
- : "+r" (src), "+r" (dest), "+m" (n)
+ : "+r" (dest), "+r" (src), "+m" (n)
: "m" (dstr), "m" (sstr)
: "eax", "edx");
}
@@ -131,10 +134,11 @@ OIL_DEFINE_IMPL_ASM (abs_u16_s16_i386asm3, abs_u16_s16);
static void
abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
- const short p[] = { -32768, -32768, -32768, -32768,
- 32767, 32767, 32767, 32767
+ const int16_t p[][4] = {
+ { -32768, -32768, -32768, -32768 },
+ { 32767, 32767, 32767, 32767 }
};
- short tmp[4];
+ int16_t tmp[4];
while (n & 3) {
*dest = ABS (*src);
@@ -158,6 +162,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
OIL_INCREMENT (src, sstr);
__asm__ __volatile__ ("\n"
" movq (%%eax), %%mm1 \n"
+ " movq %%mm1, %%mm0 \n"
" paddsw %%mm2, %%mm0 \n"
" paddsw %%mm3, %%mm1 \n"
" psubsw %%mm2, %%mm0 \n"
@@ -180,6 +185,7 @@ abs_u16_s16_mmx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
+#if 0
static void
abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
{
@@ -222,8 +228,8 @@ abs_u16_s16_mmxx (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
:"c" (p));
asm volatile ("emms");
}
-
OIL_DEFINE_IMPL_FULL (abs_u16_s16_mmxx, abs_u16_s16, OIL_IMPL_FLAG_MMX);
+#endif
static void
abs_u16_s16_mmx2 (uint16_t * dest, int dstr, int16_t * src, int sstr, int n)
diff --git a/liboil/simdpack/clip_s32.c b/liboil/simdpack/clip_s32.c
index 001ae6f..464111c 100644
--- a/liboil/simdpack/clip_s32.c
+++ b/liboil/simdpack/clip_s32.c
@@ -35,19 +35,20 @@
/* This is a suprisingly fast implementation of clipping
* in straight C. It would be difficult to do it faster in asm
* without specialized opcodes. However, this trick clips
- * the range min^(1<<31) to max^(1<<31) incorrectly. So
- * it's limited to 31 bits. */
+ * the range min^(1<<31) to max^(1<<31) incorrectly with int32_t.
+ * Thus the use of int64_t. */
static void
clip_s32_fast (int32_t *dest, int dstr, int32_t *src, int sstr, int n,
int32_t *low, int32_t *hi)
{
int i;
- int32_t x;
+ int64_t x;
for(i=0;i<n;i++){
- x = src[i];
- dest[i] = x - (((x-*low)>>31)&(x-*low)) + (((*hi-x)>>31)&(*hi-x));
+ x = OIL_GET(src,i*sstr,int32_t);
+ OIL_GET(dest,i*dstr,int32_t) = x - (((x-*low)>>31)&(x-*low))
+ + (((*hi-x)>>31)&(*hi-x));
}
}
diff --git a/liboil/simdpack/diffsquaresum_f64.c b/liboil/simdpack/diffsquaresum_f64.c
index f178902..633f90a 100644
--- a/liboil/simdpack/diffsquaresum_f64.c
+++ b/liboil/simdpack/diffsquaresum_f64.c
@@ -34,7 +34,7 @@
#include <math.h>
OIL_DEFINE_CLASS (diffsquaresum_f64,
- "double *dest, double *src1, int sstr1, double *src2, int sstr2, int n");
+ "double *d_1, double *src1, int sstr1, double *src2, int sstr2, int n");
static void
diffsquaresum_f64_ref(double *dest, double *src1, int sstr1, double *src2,
@@ -108,7 +108,8 @@ diffsquaresum_f64_i10_unroll2(double *dest, double *src1, int sstr1, double *src
}
OIL_DEFINE_IMPL (diffsquaresum_f64_i10_unroll2, diffsquaresum_f64);
-static void diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1,
+static void
+diffsquaresum_f64_i10_unroll4(double *dest, double *src1, int sstr1,
double *src2, int sstr2, int n)
{
double sum0 = 0;
diff --git a/liboil/simdpack/scalaradd.c b/liboil/simdpack/scalaradd.c
index 4216a05..f5f8857 100644
--- a/liboil/simdpack/scalaradd.c
+++ b/liboil/simdpack/scalaradd.c
@@ -76,6 +76,9 @@ static void scalaradd_ ## type ## _unroll2( \
*dest = *src + *val; \
OIL_INCREMENT(dest,dstr); \
OIL_INCREMENT(src,sstr); \
+ *dest = *src + *val; \
+ OIL_INCREMENT(dest,dstr); \
+ OIL_INCREMENT(src,sstr); \
n--; \
} \
} \
diff --git a/liboil/simdpack/scalarmult.c b/liboil/simdpack/scalarmult.c
index 958b3c3..f4d8bb3 100644
--- a/liboil/simdpack/scalarmult.c
+++ b/liboil/simdpack/scalarmult.c
@@ -75,6 +75,9 @@ static void scalarmult_ ## type ## _unroll2( \
*dest = *src * *val; \
OIL_INCREMENT(dest,dstr); \
OIL_INCREMENT(src,sstr); \
+ *dest = *src * *val; \
+ OIL_INCREMENT(dest,dstr); \
+ OIL_INCREMENT(src,sstr); \
n--; \
} \
} \
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index 9d78ff4..8388c95 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -1,7 +1,7 @@
-check_PROGRAMS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy
+check_PROGRAMS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy stride
-TESTS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy
+TESTS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy stride
AM_LDFLAGS = $(LIBOIL_LIBS) $(GLIB_LIBS)
AM_CFLAGS = $(LIBOIL_CFLAGS) $(GLIB_CFLAGS)