summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-01-10 01:10:14 +0000
committerDavid Schleef <ds@schleef.org>2005-01-10 01:10:14 +0000
commit8e5ae77db51d5d3ab2f9f90d87294d2a6f5d6dbb (patch)
treea683eb174d520f0236c5fe5c0f4ed55a98860fae
parenta3964318ce4288213110f15b6675289f0a11459c (diff)
downloadliboil-8e5ae77db51d5d3ab2f9f90d87294d2a6f5d6dbb.tar.gz
* examples/work/work.c: (test), (main): misc changes
* liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow), (conv_s32_f32_3dnow): fix asm * liboil/conv/conv_bitstuff.c: disable brokenness * liboil/liboilprofile.h: fix s390 profiling * liboil/simdpack/Makefile.am: * liboil/simdpack/clip_fast.c: (clip_s16_fast), (clip_s16_fast2), (clip_s32_fast): merge clip_s16.c and clip_s32.c * liboil/simdpack/clip_s16.c: remove * liboil/simdpack/clip_s32.c: remove * testsuite/Makefile.am: glib fixes * testsuite/proto3.c: (check_param): fail if problem * testsuite/stride.c: (main): same
-rw-r--r--ChangeLog16
-rw-r--r--examples/work/work.c13
-rw-r--r--liboil/conv/conv_3dnow.c7
-rw-r--r--liboil/conv/conv_bitstuff.c6
-rw-r--r--liboil/liboilprofile.h4
-rw-r--r--liboil/simdpack/Makefile.am3
-rw-r--r--liboil/simdpack/clip_fast.c (renamed from liboil/simdpack/clip_s16.c)28
-rw-r--r--liboil/simdpack/clip_s32.c57
-rw-r--r--testsuite/Makefile.am12
-rw-r--r--testsuite/proto3.c8
-rw-r--r--testsuite/stride.c4
11 files changed, 79 insertions, 79 deletions
diff --git a/ChangeLog b/ChangeLog
index cd666bd..69e791b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,21 @@
2005-01-09 David Schleef <ds@schleef.org>
+ * examples/work/work.c: (test), (main): misc changes
+ * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow),
+ (conv_s32_f32_3dnow): fix asm
+ * liboil/conv/conv_bitstuff.c: disable brokenness
+ * liboil/liboilprofile.h: fix s390 profiling
+ * liboil/simdpack/Makefile.am:
+ * liboil/simdpack/clip_fast.c: (clip_s16_fast), (clip_s16_fast2),
+ (clip_s32_fast): merge clip_s16.c and clip_s32.c
+ * liboil/simdpack/clip_s16.c: remove
+ * liboil/simdpack/clip_s32.c: remove
+ * testsuite/Makefile.am: glib fixes
+ * testsuite/proto3.c: (check_param): fail if problem
+ * testsuite/stride.c: (main): same
+
+2005-01-09 David Schleef <ds@schleef.org>
+
* examples/uberopt/uberopt.c: (main): Use gsize
* liboil/colorspace/rgb2bgr_powerpc.c: (rgb2bgr_ppc),
(rgb2bgr_ppc2): Fix powerpc asm
diff --git a/examples/work/work.c b/examples/work/work.c
index 8a05c18..4f7882d 100644
--- a/examples/work/work.c
+++ b/examples/work/work.c
@@ -43,7 +43,7 @@ void register_impls(void);
void test(void)
{
int16_t dest[100];
- int16_t src[100];
+ float src[100];
int i;
for(i=0;i<100;i++){
@@ -51,10 +51,10 @@ void test(void)
dest[i] = 0;
}
- oil_abs_u16_s16 (dest, 4, src, 4, 50);
+ oil_conv_s16_f32 (dest, 2, src, 4, 100);
for(i=0;i<100;i++){
- g_print("%d %d\n",dest[i],src[i]);
+ g_print("%d %g\n",dest[i],src[i]);
}
}
@@ -63,19 +63,16 @@ int main (int argc, char *argv[])
{
OilFunctionClass *klass;
OilFunctionImpl *impl;
- unsigned long cpu_flags;
oil_init ();
- cpu_flags = oil_cpu_get_flags ();
-
//register_impls();
- klass = oil_class_get ("abs_u16_s16");
+ klass = oil_class_get ("conv_s16_f32");
oil_class_optimize (klass);
for (impl = klass->first_impl; impl; impl = impl->next) {
- if (((impl->flags & OIL_CPU_FLAG_MASK) & ~cpu_flags) == 0) {
+ if (oil_impl_is_runnable (impl)) {
klass->chosen_impl = impl;
klass->func = impl->func;
g_print("impl %s %g %g\n", impl->name, impl->profile_ave,
diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c
index af7df49..1e1d884 100644
--- a/liboil/conv/conv_3dnow.c
+++ b/liboil/conv/conv_3dnow.c
@@ -42,10 +42,9 @@ conv_f32_s16_3dnow(float *dst, int dst_stride, int16_t *src, int src_stride,
for(i=0;i<n;i++){
asm volatile(
- " xor %%eax, %%eax \n"
- " movw 0(%0), %%eax \n"
+ " movswl 0(%0), %%eax \n"
" movd %%eax, %%mm0 \n"
- " pi2fd 0(%0), %%mm0 \n"
+ " pi2fd %%mm0, %%mm0 \n"
" movd %%mm0, 0(%1) \n"
:
: "r" (src), "r" (dst)
@@ -76,7 +75,7 @@ conv_s32_f32_3dnow (int32_t *dst, int dst_stride, float *src, int src_stride,
" pfadd 0(%2), %%mm0 \n"
" pf2id %%mm0, %%mm1 \n"
" pfcmpgt 0(%2), %%mm0 \n"
- " paddd %%mm0, %%mm1 \n"
+ " psubd %%mm0, %%mm1 \n"
" movd %%mm1, 0(%1) \n"
:
: "r" (src), "r" (dst), "r" (constants)
diff --git a/liboil/conv/conv_bitstuff.c b/liboil/conv/conv_bitstuff.c
index a535bfe..ce45059 100644
--- a/liboil/conv/conv_bitstuff.c
+++ b/liboil/conv/conv_bitstuff.c
@@ -106,6 +106,8 @@ OIL_DEFINE_IMPL(conv_f32_s16_bitstuff, conv_f32_s16);
#define signbit_S32(x) (((uint32_t)(x))>>31)
+#if 0
+/* broken */
/* This implementation is slightly inaccurate */
static void conv_s16_f32_bitstuff(int16_t *dst, int dest_stride, float *src,
int src_stride, int n)
@@ -126,6 +128,7 @@ static void conv_s16_f32_bitstuff(int16_t *dst, int dest_stride, float *src,
}
}
OIL_DEFINE_IMPL(conv_s16_f32_bitstuff, conv_s16_f32);
+#endif
#if 0
@@ -198,6 +201,8 @@ static void conv_f64_s16_bitstuff(float *dst, int dest_stride, int16_t *src,
OIL_DEFINE_IMPL(conv_f64_s16_bitstuff, conv_f64_s16);
#endif
+#if 0
+/* broken */
/* This implementation is slightly inaccurate */
static void conv_s16_f64_bitstuff(int16_t *dst, int dest_stride, float *src,
int src_stride, int n)
@@ -227,6 +232,7 @@ static void conv_s16_f64_bitstuff(int16_t *dst, int dest_stride, float *src,
}
}
OIL_DEFINE_IMPL(conv_s16_f64_bitstuff, conv_s16_f64);
+#endif
#endif
diff --git a/liboil/liboilprofile.h b/liboil/liboilprofile.h
index 97b8172..6d1cf3e 100644
--- a/liboil/liboilprofile.h
+++ b/liboil/liboilprofile.h
@@ -28,6 +28,8 @@
#ifndef _LIBOIL_PROFILE_H_
#define _LIBOIL_PROFILE_H_
+#include <stdint.h>
+
#define OIL_PROFILE_HIST_LENGTH 10
typedef struct _OilProfile OilProfile;
@@ -84,7 +86,7 @@ static inline unsigned long oil_profile_stamp(void)
static inline unsigned long oil_profile_stamp(void)
{
- unsigned int ts;
+ uint64_t ts;
__asm__ __volatile__ ("STCK %0\n" : "=m" (ts));
return ts;
}
diff --git a/liboil/simdpack/Makefile.am b/liboil/simdpack/Makefile.am
index 69d651a..f923315 100644
--- a/liboil/simdpack/Makefile.am
+++ b/liboil/simdpack/Makefile.am
@@ -12,8 +12,7 @@ c_sources = \
abs_misc.c \
average2_u8.c \
clip_ref.c \
- clip_s16.c \
- clip_s32.c \
+ clip_fast.c \
diffsquaresum_f64.c \
mix_u8.c \
mult8x8_s16.c \
diff --git a/liboil/simdpack/clip_s16.c b/liboil/simdpack/clip_fast.c
index 6837cbe..7c4a5ed 100644
--- a/liboil/simdpack/clip_s16.c
+++ b/liboil/simdpack/clip_fast.c
@@ -81,3 +81,31 @@ clip_s16_fast2 (int16_t *dest, int dstr, int16_t *src, int sstr, int n,
}
OIL_DEFINE_IMPL (clip_s16_fast2, clip_s16);
+
+
+
+#if 0
+/* broken */
+
+/* This trick clips
+ * the range min^(1<<31) to max^(1<<31) incorrectly with int32_t.
+ * Thus the use of int64_t. */
+
+static void
+clip_s32_fast (int32_t *dest, int dstr, int32_t *src, int sstr, int n,
+ int32_t *low, int32_t *hi)
+{
+ int i;
+ int64_t x;
+
+ for(i=0;i<n;i++){
+ x = OIL_GET(src,i*sstr,int32_t);
+ x = x - (((x-*low)>>63)&(x-*low)) + (((*hi-x)>>63)&(*hi-x));
+ OIL_GET(dest,i*dstr,int32_t) = x;
+ }
+}
+
+OIL_DEFINE_IMPL (clip_s32_fast, clip_s32);
+#endif
+
+
diff --git a/liboil/simdpack/clip_s32.c b/liboil/simdpack/clip_s32.c
deleted file mode 100644
index 1c7b784..0000000
--- a/liboil/simdpack/clip_s32.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * LIBOIL - Library of Optimized Inner Loops
- * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <liboil/liboilfunction.h>
-#include <liboil/simdpack/simdpack.h>
-
-/* This is a suprisingly fast implementation of clipping
- * in straight C. It would be difficult to do it faster in asm
- * without specialized opcodes. However, this trick clips
- * the range min^(1<<31) to max^(1<<31) incorrectly with int32_t.
- * Thus the use of int64_t. */
-
-static void
-clip_s32_fast (int32_t *dest, int dstr, int32_t *src, int sstr, int n,
- int32_t *low, int32_t *hi)
-{
- int i;
- int64_t x;
-
- for(i=0;i<n;i++){
- x = OIL_GET(src,i*sstr,int32_t);
- OIL_GET(dest,i*dstr,int32_t) = x - (((x-*low)>>63)&(x-*low))
- + (((*hi-x)>>63)&(*hi-x));
- }
-}
-
-OIL_DEFINE_IMPL (clip_s32_fast, clip_s32);
-
-
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index 8388c95..761caae 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -1,7 +1,15 @@
-check_PROGRAMS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy stride
+if HAVE_GLIB
+glib_programs = abs md5 md5_profile trans copy
+else
+glib_programs =
+endif
-TESTS = moo introspect abs proto1 md5 md5_profile proto2 test1 proto3 trans copy stride
+programs = moo introspect proto1 proto2 test1 proto3 stride
+
+check_PROGRAMS = $(programs) $(glib_programs)
+
+TESTS = $(programs) $(glib_programs)
AM_LDFLAGS = $(LIBOIL_LIBS) $(GLIB_LIBS)
AM_CFLAGS = $(LIBOIL_CFLAGS) $(GLIB_CFLAGS)
diff --git a/testsuite/proto3.c b/testsuite/proto3.c
index b118bff..65fab1e 100644
--- a/testsuite/proto3.c
+++ b/testsuite/proto3.c
@@ -117,7 +117,7 @@ int check_param (Param *p, char *s)
case 'd':
break;
default:
- printf (" ERROR at %s\n", s);
+ printf (" parse error at %s\n", s);
return 0;
}
s++;
@@ -154,7 +154,7 @@ int check_param (Param *p, char *s)
var = 2;
s++;
} else {
- printf (" ERROR at %s\n", s);
+ printf (" parse error at %s\n", s);
return 0;
}
@@ -175,7 +175,7 @@ int check_param (Param *p, char *s)
p->poststride_var = 2;
s++;
} else {
- printf (" ERROR at %s\n", s);
+ printf (" parse error at %s\n", s);
return 0;
}
@@ -193,7 +193,7 @@ int check_param (Param *p, char *s)
p->prestride_var = 0;
}
if (*s != 0) {
- printf (" ERROR at %s\n", s);
+ printf (" parse error at %s\n", s);
return 0;
}
diff --git a/testsuite/stride.c b/testsuite/stride.c
index d03546e..16a564b 100644
--- a/testsuite/stride.c
+++ b/testsuite/stride.c
@@ -49,6 +49,7 @@ int main (int argc, char *argv[])
int j;
int ret;
unsigned int cpu_flags;
+ int fail = 0;
oil_init ();
@@ -82,6 +83,7 @@ int main (int argc, char *argv[])
ret = oil_test_check_impl (test, impl);
if (!ret) {
printf(" failed stride test\n");
+ fail = 1;
}
#if 0
printf(" %lu %g\n",test->prof.min,
@@ -98,7 +100,7 @@ int main (int argc, char *argv[])
oil_test_free (test);
}
- return 0;
+ return fail;
}