summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-06-16 06:46:06 +0000
committerDavid Schleef <ds@schleef.org>2005-06-16 06:46:06 +0000
commit508ba3985f38081917b76fcfc4ac84e73ca2954a (patch)
treebd56b3f8c781a82c6f018a6d4a03d2c33fdd7a5b
parent7ffccb74ac2ebfd0b6f8361d4016b0ceb3c9581f (diff)
downloadliboil-508ba3985f38081917b76fcfc4ac84e73ca2954a.tar.gz
* configure.ac: Add instruction checker
* testsuite/Makefile.am: * testsuite/instruction/Makefile.am: * testsuite/instruction/check-instructions.pl: * testsuite/instruction/list-impls.c: (main): * liboil/colorspace/argb_paint_i386.c: Fix flags based on advice of the instruction checker * liboil/colorspace/ayuv2argb_i386.c: * liboil/conv/conv_3dnow.c: * liboil/conv/conv_sse.c: * liboil/copy/trans8x8_i386.c: * liboil/dct/idct8x8_i386.c: * liboil/sse/conv_sse.c: * liboil/liboilfuncs.h: update * liboil/liboilmarshal.c: (_oil_test_marshal_function): update * liboil/liboiltest.c: (oil_test_new), (oil_test_check_function): regenerate inplace data for every test iteration. Bump default n to 1000 to force memcpy to choose a good function. (lame hack) * liboil/copy/copy_i386.c: (copy_u8_mmx3), (copy_u8_mmx4), (copy_u8_mmx5): new implementation, fix others * liboil/copy/splat_i386.c: (splat_u32_ns_mmx): make faster * liboil/copy/splat_ref.c: (splat_u8_ns_int): fix bug * liboil/colorspace/argb_paint.c: (argb_splat_u8_ref), (rgba_splat_u8_ref): New functions * liboil/simdpack/average2_u8.c: (average2_u8_ref), (average2_u8_trick), (average2_u8_fast), (average2_u8_unroll4): Implementations really need to follow stride rules. * liboil/Makefile.am: Don't use SSE flags, because people on powerpc don't appreciate it. * examples/memcpy-speed.c: (main): only go to 1<<24 bytes
-rw-r--r--ChangeLog35
-rw-r--r--configure.ac1
-rw-r--r--examples/memcpy-speed.c2
-rw-r--r--liboil/Makefile.am2
-rw-r--r--liboil/colorspace/argb_paint.c35
-rw-r--r--liboil/colorspace/argb_paint_i386.c2
-rw-r--r--liboil/colorspace/ayuv2argb_i386.c6
-rw-r--r--liboil/conv/conv_3dnow.c4
-rw-r--r--liboil/conv/conv_sse.c4
-rw-r--r--liboil/copy/copy_i386.c137
-rw-r--r--liboil/copy/splat_i386.c13
-rw-r--r--liboil/copy/splat_ref.c2
-rw-r--r--liboil/copy/trans8x8_i386.c2
-rw-r--r--liboil/dct/idct8x8_i386.c4
-rw-r--r--liboil/liboilfuncs.h12
-rw-r--r--liboil/liboilmarshal.c18
-rw-r--r--liboil/liboiltest.c12
-rw-r--r--liboil/simdpack/average2_u8.c35
-rw-r--r--liboil/sse/conv_sse.c2
-rw-r--r--testsuite/Makefile.am2
-rw-r--r--testsuite/instruction/Makefile.am15
-rwxr-xr-xtestsuite/instruction/check-instructions.pl444
-rw-r--r--testsuite/instruction/list-impls.c83
23 files changed, 817 insertions, 55 deletions
diff --git a/ChangeLog b/ChangeLog
index 0342a99..99303bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,38 @@
+2005-06-15 David Schleef <ds@schleef.org>
+
+ * configure.ac: Add instruction checker
+ * testsuite/Makefile.am:
+ * testsuite/instruction/Makefile.am:
+ * testsuite/instruction/check-instructions.pl:
+ * testsuite/instruction/list-impls.c: (main):
+
+ * liboil/colorspace/argb_paint_i386.c: Fix flags based on advice of
+ the instruction checker
+ * liboil/colorspace/ayuv2argb_i386.c:
+ * liboil/conv/conv_3dnow.c:
+ * liboil/conv/conv_sse.c:
+ * liboil/copy/trans8x8_i386.c:
+ * liboil/dct/idct8x8_i386.c:
+ * liboil/sse/conv_sse.c:
+
+ * liboil/liboilfuncs.h: update
+ * liboil/liboilmarshal.c: (_oil_test_marshal_function): update
+ * liboil/liboiltest.c: (oil_test_new), (oil_test_check_function):
+ regenerate inplace data for every test iteration. Bump default
+ n to 1000 to force memcpy to choose a good function. (lame hack)
+ * liboil/copy/copy_i386.c: (copy_u8_mmx3), (copy_u8_mmx4),
+ (copy_u8_mmx5): new implementation, fix others
+ * liboil/copy/splat_i386.c: (splat_u32_ns_mmx): make faster
+ * liboil/copy/splat_ref.c: (splat_u8_ns_int): fix bug
+ * liboil/colorspace/argb_paint.c: (argb_splat_u8_ref),
+ (rgba_splat_u8_ref): New functions
+ * liboil/simdpack/average2_u8.c: (average2_u8_ref),
+ (average2_u8_trick), (average2_u8_fast), (average2_u8_unroll4):
+ Implementations really need to follow stride rules.
+ * liboil/Makefile.am: Don't use SSE flags, because people on
+ powerpc don't appreciate it.
+ * examples/memcpy-speed.c: (main): only go to 1<<24 bytes
+
2005-06-02 David Schleef <ds@schleef.org>
* examples/Makefile.am:
diff --git a/configure.ac b/configure.ac
index f73498a..5a28079 100644
--- a/configure.ac
+++ b/configure.ac
@@ -209,6 +209,7 @@ liboil/simdpack/Makefile
liboil/sse/Makefile
liboil/utf8/Makefile
testsuite/Makefile
+testsuite/instruction/Makefile
examples/Makefile
examples/huffman/Makefile
examples/jpeg/Makefile
diff --git a/examples/memcpy-speed.c b/examples/memcpy-speed.c
index 394625c..8d7fc69 100644
--- a/examples/memcpy-speed.c
+++ b/examples/memcpy-speed.c
@@ -44,7 +44,7 @@ main(int argc, char *argv[])
oil_class_choose_by_name (klass, impl->name);
- for(i=10;i<26;i++){
+ for(i=10;i<24;i++){
oil_profile_init (&prof);
for(j=0;j<10;j++){
if (use_memset) {
diff --git a/liboil/Makefile.am b/liboil/Makefile.am
index f33061f..5711500 100644
--- a/liboil/Makefile.am
+++ b/liboil/Makefile.am
@@ -61,7 +61,7 @@ liboil_@LIBOIL_MAJORMINOR@_la_SOURCES = \
liboil_@LIBOIL_MAJORMINOR@_la_LIBADD = \
liboilfunctions.la \
$(LIBM)
-liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS) -msse
+liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS)
liboil_@LIBOIL_MAJORMINOR@_la_LDFLAGS = \
-no-undefined \
-version-info $(LIBOIL_LIBVERSION) \
diff --git a/liboil/colorspace/argb_paint.c b/liboil/colorspace/argb_paint.c
index 7bbb168..edbdcda 100644
--- a/liboil/colorspace/argb_paint.c
+++ b/liboil/colorspace/argb_paint.c
@@ -33,6 +33,8 @@
#include <liboil/liboilfunction.h>
OIL_DEFINE_CLASS (argb_paint_u8, "uint8_t *i_4xn, uint8_t *s1_4, uint8_t *s2_n, int n");
+OIL_DEFINE_CLASS (argb_splat_u8, "uint8_t *i_4xn, uint8_t *s1_4, int n");
+OIL_DEFINE_CLASS (rgba_splat_u8, "uint8_t *i_4xn, uint8_t *s1_4, int n");
#define div255(x) (((x + 128) + ((x + 128)>>8))>>8)
@@ -56,6 +58,39 @@ argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
OIL_DEFINE_IMPL_REF (argb_paint_u8_ref, argb_paint_u8);
static void
+argb_splat_u8_ref (uint8_t *dest, uint8_t *color, int n)
+{
+ int i;
+
+ for(i=0;i<n;i++){
+ dest[0] = blend(color[0],dest[0],color[0]);
+ dest[1] = blend(color[1],dest[1],color[0]);
+ dest[2] = blend(color[2],dest[2],color[0]);
+ dest[3] = blend(color[3],dest[3],color[0]);
+ dest+=4;
+ }
+
+}
+OIL_DEFINE_IMPL_REF (argb_splat_u8_ref, argb_splat_u8);
+
+static void
+rgba_splat_u8_ref (uint8_t *dest, uint8_t *color, int n)
+{
+ int i;
+
+ for(i=0;i<n;i++){
+ dest[0] = blend(color[0],dest[0],color[3]);
+ dest[1] = blend(color[1],dest[1],color[3]);
+ dest[2] = blend(color[2],dest[2],color[3]);
+ dest[3] = blend(color[3],dest[3],color[3]);
+ dest+=4;
+ }
+
+}
+OIL_DEFINE_IMPL_REF (rgba_splat_u8_ref, rgba_splat_u8);
+
+
+static void
argb_paint_u8_fast (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
{
int i;
diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c
index a91981c..92c05ed 100644
--- a/liboil/colorspace/argb_paint_i386.c
+++ b/liboil/colorspace/argb_paint_i386.c
@@ -86,6 +86,6 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
: "+r" (dest), "+a" (color), "+r" (alpha), "+r" (n)
: "r" (&constants));
}
-OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
+OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
diff --git a/liboil/colorspace/ayuv2argb_i386.c b/liboil/colorspace/ayuv2argb_i386.c
index fbd16ae..a10a8da 100644
--- a/liboil/colorspace/ayuv2argb_i386.c
+++ b/liboil/colorspace/ayuv2argb_i386.c
@@ -87,7 +87,7 @@ ayuv2argb_u8_mmx (uint8_t *argb, uint8_t *ayuv, int n)
: "r" (&constants));
}
-OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
static short constants2[][4] = {
@@ -132,7 +132,7 @@ ayuv2argb_u8_mmx2 (uint8_t *argb, uint8_t *ayuv, int n)
}
-OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
static void
ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n)
@@ -170,5 +170,5 @@ ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n)
: "r" (&constants2));
}
-OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c
index 1b86b58..4c6024e 100644
--- a/liboil/conv/conv_3dnow.c
+++ b/liboil/conv/conv_3dnow.c
@@ -52,7 +52,7 @@ conv_f32_s16_3dnow (float *dst, int dst_stride, int16_t * src, int src_stride,
asm volatile ("emms");
}
-OIL_DEFINE_IMPL_FULL (conv_f32_s16_3dnow, conv_f32_s16, OIL_IMPL_FLAG_3DNOW);
+OIL_DEFINE_IMPL_FULL (conv_f32_s16_3dnow, conv_f32_s16, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_3DNOW);
/* suboptimal */
static void
@@ -83,4 +83,4 @@ conv_s32_f32_3dnow (int32_t * dst, int dst_stride, float *src, int src_stride,
asm volatile ("emms");
}
-OIL_DEFINE_IMPL_FULL (conv_s32_f32_3dnow, conv_s32_f32, OIL_IMPL_FLAG_3DNOW);
+OIL_DEFINE_IMPL_FULL (conv_s32_f32_3dnow, conv_s32_f32, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_3DNOW);
diff --git a/liboil/conv/conv_sse.c b/liboil/conv/conv_sse.c
index fbdf640..f4d383d 100644
--- a/liboil/conv/conv_sse.c
+++ b/liboil/conv/conv_sse.c
@@ -32,7 +32,6 @@
#include <conv.h>
-#ifdef __GNUC__
/* suboptimal */
static void conv_f64_s32_sse(double *dst, int dst_stride, int32_t *src, int src_stride, int n)
{
@@ -51,7 +50,7 @@ static void conv_f64_s32_sse(double *dst, int dst_stride, int32_t *src, int src_
}
}
OIL_DEFINE_IMPL_FULL(conv_f64_s32_sse, conv_f64_s32,
- OIL_IMPL_FLAG_SSE2);
+ OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE2);
/* suboptimal */
static void conv_s32_f64_sse(int32_t *dst, int dst_stride, double *src, int src_stride, int n)
@@ -72,5 +71,4 @@ static void conv_s32_f64_sse(int32_t *dst, int dst_stride, double *src, int src_
}
OIL_DEFINE_IMPL_FULL(conv_s32_f64_sse, conv_s32_f64,
OIL_IMPL_FLAG_SSE2);
-#endif
diff --git a/liboil/copy/copy_i386.c b/liboil/copy/copy_i386.c
index 29bc9e0..1a8450b 100644
--- a/liboil/copy/copy_i386.c
+++ b/liboil/copy/copy_i386.c
@@ -95,37 +95,154 @@ OIL_DEFINE_IMPL_FULL (copy_u8_mmx2, copy_u8, OIL_IMPL_FLAG_MMX);
static void
copy_u8_mmx3 (uint8_t *dest, uint8_t *src, int n)
{
- while(n&0x3) {
+ /* make sure destination is cache-line aligned for output */
+ if (n < 64) {
+ while (n>0) {
+ *dest++ = *src++;
+ n--;
+ }
+ return;
+ }
+ while (((unsigned long)dest) & 0x3) {
*dest++ = *src++;
n--;
}
- while (n&0x3c) {
+ while (((unsigned long)dest) & 0x3f) {
*(uint32_t *)dest = *(uint32_t *)src;
dest += 4;
src += 4;
n-=4;
}
- if (n) asm volatile (
+ if (n > 64) asm volatile (
" mov $0, %%eax\n"
"1:\n"
//" prefetchnta 128(%1,%%eax)\n"
- " pxor (%1,%%eax), %%mm0\n"
- " pxor 8(%1,%%eax), %%mm1\n"
- " pxor 16(%1,%%eax), %%mm2\n"
- " pxor 24(%1,%%eax), %%mm3\n"
+ " movq (%1,%%eax), %%mm0\n"
+ " movq 8(%1,%%eax), %%mm1\n"
+ " movq 16(%1,%%eax), %%mm2\n"
+ " movq 24(%1,%%eax), %%mm3\n"
+ " movq 32(%1,%%eax), %%mm4\n"
+ " movq 40(%1,%%eax), %%mm5\n"
+ " movq 48(%1,%%eax), %%mm6\n"
+ " movq 56(%1,%%eax), %%mm7\n"
" movntq %%mm0, (%0,%%eax)\n"
" movntq %%mm1, 8(%0,%%eax)\n"
" movntq %%mm2, 16(%0,%%eax)\n"
" movntq %%mm3, 24(%0,%%eax)\n"
- " add $32, %%eax\n"
+ " movntq %%mm4, 32(%0,%%eax)\n"
+ " movntq %%mm5, 40(%0,%%eax)\n"
+ " movntq %%mm6, 48(%0,%%eax)\n"
+ " movntq %%mm7, 56(%0,%%eax)\n"
+ " add $64, %%eax\n"
" decl %%ecx\n"
" jne 1b\n"
+ " sfence\n"
" emms\n"
: "+r" (dest), "+r" (src)
- : "c" (n/32)
+ : "c" (n>>6)
: "eax");
+
+ dest += n&(~(0x3f));
+ src += n&(~(0x3f));
+ n &= 0x3f;
+ while (n > 3) {
+ *(uint32_t *)dest = *(uint32_t *)src;
+ dest += 4;
+ src += 4;
+ n-=4;
+ }
+ while (n > 0) {
+ *dest++ = *src++;
+ n--;
+ }
}
-OIL_DEFINE_IMPL_FULL (copy_u8_mmx3, copy_u8, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (copy_u8_mmx3, copy_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
+static void
+copy_u8_mmx4 (uint8_t *dest, uint8_t *src, int n)
+{
+ /* make sure destination is cache-line aligned for output */
+ if (n < 32) {
+ while (n>0) {
+ *dest++ = *src++;
+ n--;
+ }
+ return;
+ }
+ while (((unsigned long)dest) & 0x3) {
+ *dest++ = *src++;
+ n--;
+ }
+ while (((unsigned long)dest) & 0x1f) {
+ *(uint32_t *)dest = *(uint32_t *)src;
+ dest += 4;
+ src += 4;
+ n-=4;
+ }
+ if (n > 32) asm volatile (
+ " mov $0, %%eax\n"
+ "1:\n"
+ //" prefetchnta 128(%1,%%eax)\n"
+ " movq (%1,%%eax), %%mm0\n"
+ " movq 8(%1,%%eax), %%mm1\n"
+ " movq 16(%1,%%eax), %%mm2\n"
+ " movq 24(%1,%%eax), %%mm3\n"
+ " movntq %%mm0, (%0,%%eax)\n"
+ " movntq %%mm1, 8(%0,%%eax)\n"
+ " movntq %%mm2, 16(%0,%%eax)\n"
+ " movntq %%mm3, 24(%0,%%eax)\n"
+ " add $32, %%eax\n"
+ " decl %%ecx\n"
+ " jne 1b\n"
+ " sfence\n"
+ " emms\n"
+ : "+r" (dest), "+r" (src)
+ : "c" (n>>5)
+ : "eax");
+
+ dest += n&(~(0x1f));
+ src += n&(~(0x1f));
+ n &= 0x1f;
+ while (n > 3) {
+ *(uint32_t *)dest = *(uint32_t *)src;
+ dest += 4;
+ src += 4;
+ n-=4;
+ }
+ while (n > 0) {
+ *dest++ = *src++;
+ n--;
+ }
+}
+OIL_DEFINE_IMPL_FULL (copy_u8_mmx4, copy_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
+static void
+copy_u8_mmx5 (uint8_t *dest, uint8_t *src, int n)
+{
+ while (n&0xc) {
+ *(uint32_t *)dest = *(uint32_t *)src;
+ dest += 4;
+ src += 4;
+ n-=4;
+ }
+ while(n&0xf) {
+ *dest++ = *src++;
+ n--;
+ }
+ if (n) asm volatile (
+ " mov $0, %%eax\n"
+ "1:\n"
+ " movq (%1,%%eax), %%mm0\n"
+ " movq 8(%1,%%eax), %%mm1\n"
+ " movq %%mm0, (%0,%%eax)\n"
+ " movq %%mm1, 8(%0,%%eax)\n"
+ " add $16, %%eax\n"
+ " decl %%ecx\n"
+ " jne 1b\n"
+ " emms\n"
+ : "+r" (dest), "+r" (src)
+ : "c" (n/16)
+ : "eax");
+}
+OIL_DEFINE_IMPL_FULL (copy_u8_mmx5, copy_u8, OIL_IMPL_FLAG_MMX);
diff --git a/liboil/copy/splat_i386.c b/liboil/copy/splat_i386.c
index 4bc17c0..5106476 100644
--- a/liboil/copy/splat_i386.c
+++ b/liboil/copy/splat_i386.c
@@ -37,7 +37,7 @@ OIL_DECLARE_CLASS(splat_u32_ns);
static void
splat_u32_ns_mmx (uint32_t *dest, uint32_t *src, int n)
{
- while(n&0xf) {
+ while(n&0x7) {
*dest++ = *src;
n--;
}
@@ -50,19 +50,16 @@ splat_u32_ns_mmx (uint32_t *dest, uint32_t *src, int n)
" movntq %%mm0, 8(%0,%%eax)\n"
" movntq %%mm0, 16(%0,%%eax)\n"
" movntq %%mm0, 24(%0,%%eax)\n"
- " movntq %%mm0, 32(%0,%%eax)\n"
- " movntq %%mm0, 40(%0,%%eax)\n"
- " movntq %%mm0, 48(%0,%%eax)\n"
- " movntq %%mm0, 56(%0,%%eax)\n"
- " add $64, %%eax\n"
+ " add $32, %%eax\n"
" decl %%ecx\n"
" jne 1b\n"
+ " sfence\n"
" emms\n"
: "+r" (dest), "+r" (src)
- : "c" (n/16)
+ : "c" (n/8)
: "eax");
}
-OIL_DEFINE_IMPL_FULL (splat_u32_ns_mmx, splat_u32_ns, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (splat_u32_ns_mmx, splat_u32_ns, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
diff --git a/liboil/copy/splat_ref.c b/liboil/copy/splat_ref.c
index ddd1444..40f636b 100644
--- a/liboil/copy/splat_ref.c
+++ b/liboil/copy/splat_ref.c
@@ -144,7 +144,7 @@ static void splat_u8_ns_int (uint8_t *dest, uint8_t *param, int n)
dest++;
n--;
}
- n >>= 4;
+ n >>= 2;
p = (*param<<24) | (*param<<16) | (*param<<8) | (*param);
while(n>0){
*(uint32_t *)dest = p;
diff --git a/liboil/copy/trans8x8_i386.c b/liboil/copy/trans8x8_i386.c
index f0de418..12881e4 100644
--- a/liboil/copy/trans8x8_i386.c
+++ b/liboil/copy/trans8x8_i386.c
@@ -134,7 +134,7 @@ trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
: "eax");
}
-OIL_DEFINE_IMPL (trans8x8_u16_mmx, trans8x8_u16);
+OIL_DEFINE_IMPL_FULL (trans8x8_u16_mmx, trans8x8_u16, OIL_IMPL_FLAG_MMX);
static void
trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
diff --git a/liboil/dct/idct8x8_i386.c b/liboil/dct/idct8x8_i386.c
index 436039c..e8a88c2 100644
--- a/liboil/dct/idct8x8_i386.c
+++ b/liboil/dct/idct8x8_i386.c
@@ -388,7 +388,7 @@ idct8x8_s16_mmx (int16_t *dest, int dstr, int16_t *src, int sstr)
: "m" (dest), "m" (src), "m" (dstr), "m" (sstr), "r" (tmp), "r" (dct_mmx_constants), "m" (save_ebx)
: "eax", "ecx", "edx");
}
-OIL_DEFINE_IMPL_FULL (idct8x8_s16_mmx, idct8x8_s16, OIL_IMPL_FLAG_MMX);
+OIL_DEFINE_IMPL_FULL (idct8x8_s16_mmx, idct8x8_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
#if 0
#define CONST(x) (32768.0*(x) + 0.5)
@@ -740,5 +740,5 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
: "eax", "ecx", "edx");
}
-OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE);
+OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index 2744781..f855a73 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -54,6 +54,9 @@ typedef void (*_oil_type_abs_u8_s8)(uint8_t * dest, int dstr, const int8_t * src
extern OilFunctionClass *oil_function_class_ptr_argb_paint_u8;
typedef void (*_oil_type_argb_paint_u8)(uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s2_n, int n);
#define oil_argb_paint_u8 ((_oil_type_argb_paint_u8)(*(void **)oil_function_class_ptr_argb_paint_u8))
+extern OilFunctionClass *oil_function_class_ptr_argb_splat_u8;
+typedef void (*_oil_type_argb_splat_u8)(uint8_t * i_4xn, const uint8_t * s1_4, int n);
+#define oil_argb_splat_u8 ((_oil_type_argb_splat_u8)(*(void **)oil_function_class_ptr_argb_splat_u8))
extern OilFunctionClass *oil_function_class_ptr_average2_u8;
typedef void (*_oil_type_average2_u8)(uint8_t * dest, int dstr, const uint8_t * src1, int sstr1, const uint8_t * src2, int sstr2, int n);
#define oil_average2_u8 ((_oil_type_average2_u8)(*(void **)oil_function_class_ptr_average2_u8))
@@ -450,12 +453,21 @@ typedef void (*_oil_type_permute_u32)(uint32_t * dest, int dstr, const uint32_t
extern OilFunctionClass *oil_function_class_ptr_permute_u8;
typedef void (*_oil_type_permute_u8)(uint8_t * dest, int dstr, const uint8_t * src1, int sstr1, const int32_t * src2, int sstr2, int n);
#define oil_permute_u8 ((_oil_type_permute_u8)(*(void **)oil_function_class_ptr_permute_u8))
+extern OilFunctionClass *oil_function_class_ptr_resample_linear_argb_u8;
+typedef void (*_oil_type_resample_linear_argb_u8)(uint8_t * d_4xn, const uint8_t * s_8xn, int n, uint32_t * i_2);
+#define oil_resample_linear_argb_u8 ((_oil_type_resample_linear_argb_u8)(*(void **)oil_function_class_ptr_resample_linear_argb_u8))
+extern OilFunctionClass *oil_function_class_ptr_resample_linear_u8;
+typedef void (*_oil_type_resample_linear_u8)(uint8_t * d_n, const uint8_t * s_2xn, int n, uint32_t * i_2);
+#define oil_resample_linear_u8 ((_oil_type_resample_linear_u8)(*(void **)oil_function_class_ptr_resample_linear_u8))
extern OilFunctionClass *oil_function_class_ptr_rgb2bgr;
typedef void (*_oil_type_rgb2bgr)(uint8_t * d_3xn, const uint8_t * s_3xn, int n);
#define oil_rgb2bgr ((_oil_type_rgb2bgr)(*(void **)oil_function_class_ptr_rgb2bgr))
extern OilFunctionClass *oil_function_class_ptr_rgb2rgba;
typedef void (*_oil_type_rgb2rgba)(uint8_t * d_4xn, const uint8_t * s_3xn, int n);
#define oil_rgb2rgba ((_oil_type_rgb2rgba)(*(void **)oil_function_class_ptr_rgb2rgba))
+extern OilFunctionClass *oil_function_class_ptr_rgba_splat_u8;
+typedef void (*_oil_type_rgba_splat_u8)(uint8_t * i_4xn, const uint8_t * s1_4, int n);
+#define oil_rgba_splat_u8 ((_oil_type_rgba_splat_u8)(*(void **)oil_function_class_ptr_rgba_splat_u8))
extern OilFunctionClass *oil_function_class_ptr_sad8x8_f64;
typedef void (*_oil_type_sad8x8_f64)(double * d_8x8, int ds, const double * s1_8x8, int ss1, const double * s2_8x8, int ss2);
#define oil_sad8x8_f64 ((_oil_type_sad8x8_f64)(*(void **)oil_function_class_ptr_sad8x8_f64))
diff --git a/liboil/liboilmarshal.c b/liboil/liboilmarshal.c
index 3631aa6..4ee633a 100644
--- a/liboil/liboilmarshal.c
+++ b/liboil/liboilmarshal.c
@@ -52,18 +52,18 @@ _oil_test_marshal_function (void *func, unsigned long *args, int n_args,
((void *)args[0],(void *)args[1],(void *)args[2],(int)args[3]);
oil_profile_stop (prof);
break;
- case 0x00d4:
- oil_profile_start (prof);
- ((void (*)(void *,int,void *,int,void *,int,int))func)
- ((void *)args[0],(int)args[1],(void *)args[2],(int)args[3],(void *)args[4],(int)args[5],(int)args[6]);
- oil_profile_stop (prof);
- break;
case 0x000e:
oil_profile_start (prof);
((void (*)(void *,void *,int))func)
((void *)args[0],(void *)args[1],(int)args[2]);
oil_profile_stop (prof);
break;
+ case 0x00d4:
+ oil_profile_start (prof);
+ ((void (*)(void *,int,void *,int,void *,int,int))func)
+ ((void *)args[0],(int)args[1],(void *)args[2],(int)args[3],(void *)args[4],(int)args[5],(int)args[6]);
+ oil_profile_stop (prof);
+ break;
case 0x00d3:
oil_profile_start (prof);
((void (*)(void *,int,void *,int,int,void *,void *))func)
@@ -118,6 +118,12 @@ _oil_test_marshal_function (void *func, unsigned long *args, int n_args,
();
oil_profile_stop (prof);
break;
+ case 0x001d:
+ oil_profile_start (prof);
+ ((void (*)(void *,void *,int,void *))func)
+ ((void *)args[0],(void *)args[1],(int)args[2],(void *)args[3]);
+ oil_profile_stop (prof);
+ break;
case 0x003a:
oil_profile_start (prof);
((void (*)(void *,void *,int,void *,int))func)
diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c
index 5e3d13d..2f5c430 100644
--- a/liboil/liboiltest.c
+++ b/liboil/liboiltest.c
@@ -79,7 +79,7 @@ oil_test_new (OilFunctionClass *klass)
}
test->iterations = 10;
- test->n = 100;
+ test->n = 1000;
test->m = 100;
return test;
@@ -172,6 +172,16 @@ oil_test_check_function (void * priv)
oil_profile_init (&test->prof);
for(i=0;i<test->iterations;i++){
+ int k;
+
+ for(k=0;k<test->proto->n_params;k++){
+ OilParameter *p;
+ j = test->proto->params[k].parameter_type;
+ p = &test->params[j];
+ if (p->direction == 'i') {
+ memcpy (p->test_data, p->src_data, p->size);
+ }
+ }
_oil_test_marshal_function (test->impl->func, args, test->proto->n_params,
pointer_mask, &test->prof);
}
diff --git a/liboil/simdpack/average2_u8.c b/liboil/simdpack/average2_u8.c
index 3c3f546..fe07aa8 100644
--- a/liboil/simdpack/average2_u8.c
+++ b/liboil/simdpack/average2_u8.c
@@ -45,7 +45,7 @@ average2_u8_ref (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
int i;
for (i = 0; i < n; i++) {
- dest[i] = (src1[sstr1 * i] + src2[sstr2 * i]) >> 1;
+ dest[dstr * i] = (src1[sstr1 * i] + src2[sstr2 * i]) >> 1;
}
}
@@ -57,7 +57,8 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
{
unsigned int x, y, d;
- if (sstr1 == 1 && sstr2 == 1) {
+#if 0
+ if (sstr1 == 1 && sstr2 == 1 && dstr == 1) {
while (n > 0) {
x = *(unsigned int *) src1;
y = *(unsigned int *) src2;
@@ -67,8 +68,9 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
dest += 4;
n -= 4;
}
- }
- else {
+ } else
+#endif
+ {
while (n > 0) {
x = (src1[0] << 24) | (src1[sstr1] << 16) | (src1[2 *
sstr1] << 8) | (src1[3 * sstr1]);
@@ -76,12 +78,12 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
sstr2] << 8) | (src2[3 * sstr2]);
d = (((x ^ y) & 0xfefefefe) >> 1) + (x & y);
dest[0] = (d >> 24);
- dest[1] = (d >> 16);
- dest[2] = (d >> 8);
- dest[3] = (d >> 0);
+ dest[1*dstr] = (d >> 16);
+ dest[2*dstr] = (d >> 8);
+ dest[3*dstr] = (d >> 0);
src1 += 4 * sstr1;
src2 += 4 * sstr2;
- dest += 4;
+ dest += 4 * dstr;
n -= 4;
}
}
@@ -94,9 +96,10 @@ average2_u8_fast (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
uint8_t *src2, int sstr2, int n)
{
while (n > 0) {
- *dest++ = (*src1 + *src2) >> 1;
+ *dest = (*src1 + *src2) >> 1;
src1 += sstr1;
src2 += sstr2;
+ dest += dstr;
n--;
}
}
@@ -108,22 +111,26 @@ average2_u8_unroll4 (uint8_t * dest, int dstr, uint8_t *src1, int sstr1,
uint8_t *src2, int sstr2, int n)
{
while (n & 0x3) {
- *dest++ = (*src1 + *src2) >> 1;
+ *dest = (*src1 + *src2) >> 1;
src1 += sstr1;
src2 += sstr2;
n--;
}
while (n > 0) {
- *dest++ = (*src1 + *src2) >> 1;
+ *dest = (*src1 + *src2) >> 1;
+ dest += dstr;
src1 += sstr1;
src2 += sstr2;
- *dest++ = (*src1 + *src2) >> 1;
+ *dest = (*src1 + *src2) >> 1;
+ dest += dstr;
src1 += sstr1;
src2 += sstr2;
- *dest++ = (*src1 + *src2) >> 1;
+ *dest = (*src1 + *src2) >> 1;
+ dest += dstr;
src1 += sstr1;
src2 += sstr2;
- *dest++ = (*src1 + *src2) >> 1;
+ *dest = (*src1 + *src2) >> 1;
+ dest += dstr;
src1 += sstr1;
src2 += sstr2;
n -= 4;
diff --git a/liboil/sse/conv_sse.c b/liboil/sse/conv_sse.c
index 0c51973..3602221 100644
--- a/liboil/sse/conv_sse.c
+++ b/liboil/sse/conv_sse.c
@@ -84,5 +84,5 @@ conv_f32_s32_sse (float *dst, int dst_stride, int32_t * src, int src_stride,
_m_empty();
}
-OIL_DEFINE_IMPL_FULL (conv_f32_s32_sse, conv_f32_s32, OIL_IMPL_FLAG_SSE);
+OIL_DEFINE_IMPL_FULL (conv_f32_s32_sse, conv_f32_s32, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE);
diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am
index 0bc4c72..db7c347 100644
--- a/testsuite/Makefile.am
+++ b/testsuite/Makefile.am
@@ -1,4 +1,6 @@
+SUBDIRS = instruction
+
if HAVE_GLIB
glib_programs = abs md5 md5_profile trans copy
else
diff --git a/testsuite/instruction/Makefile.am b/testsuite/instruction/Makefile.am
new file mode 100644
index 0000000..804d643
--- /dev/null
+++ b/testsuite/instruction/Makefile.am
@@ -0,0 +1,15 @@
+
+noinst_PROGRAMS = list-impls
+
+noinst_DATA = report
+
+AM_LDFLAGS = $(LIBOIL_LIBS)
+AM_CFLAGS = $(LIBOIL_CFLAGS)
+
+
+report: list-impls
+ ./check-instructions.pl $(top_builddir)/liboil/.libs/liboiltmp1.so >report
+
+
+EXTRA_CLEAN = report
+
diff --git a/testsuite/instruction/check-instructions.pl b/testsuite/instruction/check-instructions.pl
new file mode 100755
index 0000000..fa250ee
--- /dev/null
+++ b/testsuite/instruction/check-instructions.pl
@@ -0,0 +1,444 @@
+#!/usr/bin/perl -w
+# vi: set ts=4:
+#
+
+$debug = 0;
+
+sub get_flags
+{
+ my @list = @insns;
+ my $exts = {};
+ my $s;
+
+ $debug && print "function: $func\n";
+ foreach $insn (@list) {
+ if (grep { /^$insn$/ } @normal_list) {
+ $debug && print " $insn: normal\n";
+ }elsif (grep { /^$insn$/ } @mmx_list) {
+ $exts->{"mmx"} = 1;
+ $debug && print " $insn: mmx\n";
+ }elsif (grep { /^$insn$/ } @mmx_ext_list) {
+ $exts->{"mmxext"} = 1;
+ $debug && print " $insn: mmxext\n";
+ }elsif (grep { /^$insn$/ } @_3dnow_list) {
+ $exts->{"3dnow"} = 1;
+ $debug && print " $insn: 3dnow\n";
+ }elsif (grep { /^$insn$/ } @_3dnow_ext_list) {
+ $exts->{"3dnowext"} = 1;
+ $debug && print " $insn: 3dnowext\n";
+ }elsif (grep { /^$insn$/ } @sse_list) {
+ $exts->{"sse"} = 1;
+ $debug && print " $insn: sse\n";
+ }elsif (grep { /^$insn$/ } @sse2_list) {
+ $exts->{"sse2"} = 1;
+ $debug && print " $insn: sse2\n";
+ }else {
+ print "FIXME:\t\"$insn\",\n";
+ $error = 1;
+ }
+ }
+ $s = join(" ",sort(keys(%$exts)));
+ $funcs->{"$func"} = $s;
+ $debug && print " FLAGS: $s\n";
+}
+
+sub check
+{
+ foreach $insn (@normal_list) {
+ if (grep { /^$insn$/ } @mmx_list) {
+ print "FIXME: $insn is in mmx_list\n";
+ $error = 1;
+ } elsif (grep { /^$insn$/ } @mmx_ext_list) {
+ print "FIXME: $insn is in mmx_ext_list\n";
+ $error = 1;
+ } elsif (grep { /^$insn$/ } @_3dnow_list) {
+ print "FIXME: $insn is in _3dnow_list\n";
+ $error = 1;
+ } elsif (grep { /^$insn$/ } @_3dnow_ext_list) {
+ print "FIXME: $insn is in _3dnow_ext_list\n";
+ $error = 1;
+ } elsif (grep { /^$insn$/ } @sse_list) {
+ print "FIXME: $insn is in sse_list\n";
+ $error = 1;
+ } elsif (grep { /^$insn$/ } @sse2_list) {
+ print "FIXME: $insn is in sse2_list\n";
+ $error = 1;
+ }
+ }
+}
+
+@normal_list = (
+ "add",
+ "addl",
+ "and",
+ "andl",
+ "call",
+ "cld",
+ "cltd",
+ "cmovg",
+ "cmp",
+ "cmpb",
+ "cmpl",
+ "cwtl",
+ "dec",
+ "decl",
+ "fabs",
+ "fadd",
+ "faddl",
+ "faddp",
+ "fadds",
+ "fdivrl",
+ "fild",
+ "fildl",
+ "fildll",
+ "fistp",
+ "fistpl",
+ "fistpll",
+ "fldcw",
+ "fldl",
+ "flds",
+ "fldz",
+ "fmul",
+ "fmull",
+ "fmulp",
+ "fmuls",
+ "fnstcw",
+ "fnstsw",
+ "fsqrt",
+ "fstl",
+ "fstp",
+ "fstpl",
+ "fstps",
+ "fsts",
+ "fsub",
+ "fsubl",
+ "fsubp",
+ "fsubr",
+ "fsubrl",
+ "fsubrp",
+ "fucom",
+ "fucomp",
+ "fucompp",
+ "fxch",
+ "imul",
+ "inc",
+ "incl",
+ "ja",
+ "jae",
+ "jbe",
+ "je",
+ "jg",
+ "jge",
+ "jl",
+ "jle",
+ "jmp",
+ "jne",
+ "jns",
+ "jp",
+ "js",
+ "lea",
+ "leave",
+ "mov",
+ "movb",
+ "movl",
+ "movsbl",
+ "movsbw",
+ "movswl",
+ "movsww",
+ "movzbl",
+ "movzbw",
+ "movzwl",
+ "mulb",
+ "neg",
+ "nop",
+ "not",
+ "or",
+ "pop",
+ "push",
+ "pushl",
+ "repz",
+ "ret",
+ "rol",
+ "sahf",
+ "sar",
+ "sarl",
+ "shl",
+ "shr",
+ "sub",
+ "subl",
+ "test",
+ "testb",
+ "testl",
+ "xor",
+);
+
+# verified
+@mmx_list = (
+ "emms",
+ "movd",
+ "movq",
+ "packssdw",
+ "packsswb",
+ "packuswb",
+ "paddb",
+ "paddd",
+ "paddsb",
+ "paddsw",
+ "paddusb",
+ "paddusw",
+ "paddw",
+ "pand",
+ "pandn",
+ "pcmpeqb",
+ "pcmpeqd",
+ "pcmpgtb",
+ "pcmpgtd",
+ "pcmpgtw",
+ "pmaddwd",
+ "pmulhw",
+ "pmullw",
+ "por",
+ "pslld",
+ "psllq",
+ "psllw",
+ "psrad",
+ "psraw",
+ "psrld",
+ "psrlq",
+ "psrlw",
+ "psubb",
+ "psubd",
+ "psubsb",
+ "psubsw",
+ "psubusb",
+ "psubusw",
+ "psubw",
+ "punpckhbw",
+ "punpckhdq",
+ "punpckhwd",
+ "punpcklbw",
+ "punpckldq",
+ "punpcklwd",
+ "pxor"
+);
+
+# verified
+@_3dnow_list = (
+ "femms",
+ "pavgusb",
+ "pf2id",
+ "pfacc",
+ "pfadd",
+ "pfcmpeq",
+ "pfcmpge",
+ "pfcmpgt",
+ "pfmax",
+ "pfmin",
+ "pfmul",
+ "pfrcp",
+ "pfrcpit1",
+ "pfrcpit2",
+ "pfrsqit1",
+ "pfrsqrt",
+ "pfsub",
+ "pfsubr",
+ "pi2fd",
+ "pmulhrw",
+ "prefetch",
+ "prefetchw"
+);
+
+# verified
+@_3dnow_ext_list = (
+ "pf2iw",
+ "pfnacc",
+ "pfpnacc",
+ "pi2fw",
+ "pswapd"
+);
+
+# verified
+@mmx_ext_list = (
+ "maskmovq",
+ "movntq",
+ "pavgb",
+ "pavgw",
+ "pextrw",
+ "pinsrw",
+ "pmaxsw",
+ "pmaxub",
+ "pminsw",
+ "pminub",
+ "pmovmskb",
+ "pmulhuw",
+ "prefetchnta",
+ "prefetch0",
+ "prefetch1",
+ "prefetch2",
+ "psadbw",
+ "pshufw",
+ "sfence"
+);
+
+# verified
+@sse_list = (
+ "addps",
+ "addss",
+ "andnps",
+ "andps",
+ "cmpps",
+ "cmpss",
+ "comiss",
+ "cvtpi2ps",
+ "cvtps2ps",
+ "cvtsi2ss",
+ "cvtss2si",
+ "cvttps2pi",
+ "cvttss2si",
+ "divps",
+ "divss",
+ "fxrstor",
+ "fxsave",
+ "ldmxcsr",
+ "maxps",
+ "maxss",
+ "minps",
+ "minss",
+ "movaps",
+ "movhlps",
+ "movhps",
+ "movlhps",
+ "movlps",
+ "movmskps",
+ "movss",
+ "movups",
+ "mulps",
+ "mulss",
+ "orps",
+ "rcpps",
+ "rcpss",
+ "rsqrtps",
+ "rsqrtss",
+ "shufps",
+ "sqrtps",
+ "sqrtss",
+ "stmxcsr",
+ "subps",
+ "subss",
+ "ucomiss",
+ "unpckhps",
+ "unpcklps",
+ "xorps"
+);
+
+@sse2_list = (
+ "addpd",
+ "addsd",
+ "andnpd",
+ "andpd",
+ "cmppd",
+ "cmpsd",
+ "comisd",
+ "cvtpi2pd",
+ "cvtpd2pi",
+ "cvtsi2sd",
+ "cvtsd2si",
+ "cvttpd2pi",
+ "cvttsd2si",
+ "cvtpd2ps",
+ "cvtps2pd",
+ "cvtsd2ss",
+ "cvtss2sd",
+ "cvtps2dq",
+ "cvttpd2dq",
+ "cvtdq2pd",
+ "cvtps2dq",
+ "cvttps2dq",
+ "cvtdq2ps",
+ "divpd",
+ "divsd",
+ "maxpd",
+ "maxsd",
+ "minsd",
+ "movapd",
+ "movhpd",
+ "movlpd",
+ "movmskpd",
+ "movsd",
+ "movupd",
+ "mulpd",
+ "mulsd",
+ "orpd",
+ "shufpd",
+ "sqrtpd",
+ "sqrtsd",
+ "subpd",
+ "subsd",
+ "ucomisd",
+ "unpckhpd",
+ "unpcklpd",
+ "xorpd",
+);
+
+$funcs = {};
+
+$ARGV=shift @ARGV;
+@output=`objdump -dr $ARGV`;
+
+check();
+
+$error = 0;
+@insns = ();
+while($_=shift @output){
+ chomp;
+ if(m/^0[0-9a-fA-F]+\s<[\.\w]+>:$/){
+ $f = $_;
+ $f =~ s/^0[0-9a-fA-F]+\s<([\.\w]+)>:$/$1/;
+
+ if (@insns) {
+ get_flags ();
+ }
+
+ $func = $f;
+
+ @insns = ();
+ $debug && print "$func:\n";
+
+ } elsif(m/^[\s0-9a-f]+:\s[\s0-9a-f]+\s([a-z0-9]+)\s/){
+ if (!grep { /$1/ } @insns) {
+ push @insns, $1;
+ }
+ #print " $1\n";
+ } elsif (m/^$/) {
+ } elsif (m/^Disassembly of section/) {
+ } elsif (m/\sfile format\s/) {
+ } else {
+ print "FIXME: $_\n";
+ $error = 1;
+ }
+}
+
+@source = `./list-impls`;
+while ($_ = shift @source) {
+ chomp;
+ if (m/^([\w\.]+):\s*([\w\s*]*)/) {
+ $func = $1;
+ $flags = $2;
+
+ $xflags = $funcs->{$func};
+ if (1) {
+ if ($flags ne $xflags) {
+ print "$func: \"$flags\" should be \"$xflags\"\n";
+ }
+ } else {
+ print "FIXME: function \"$func\" has no disassembly\n";
+ $error = 1;
+ }
+ } else {
+ print "FIXME: bad match: $_\n";
+ }
+}
+
+exit $error;
+
+
diff --git a/testsuite/instruction/list-impls.c b/testsuite/instruction/list-impls.c
new file mode 100644
index 0000000..905f64b
--- /dev/null
+++ b/testsuite/instruction/list-impls.c
@@ -0,0 +1,83 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+#include <liboil/liboilcpu.h>
+#include <liboil/liboiltest.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+int
+main (int argc, char *argv[])
+{
+ int n;
+ int i;
+
+ oil_init();
+
+ n = oil_class_get_n_classes ();
+ for (i = 0; i < n; i++){
+ OilFunctionClass *klass = oil_class_get_by_index (i);
+ OilFunctionImpl *impl;
+
+ for (impl = klass->first_impl; impl; impl = impl->next) {
+ printf("%s:", impl->name);
+#ifdef HAVE_CPU_I386
+ if (impl->flags & OIL_IMPL_FLAG_3DNOW)
+ printf(" 3dnow");
+ if (impl->flags & OIL_IMPL_FLAG_3DNOWEXT)
+ printf(" 3dnowext");
+ if (impl->flags & OIL_IMPL_FLAG_CMOV)
+ printf(" cmov");
+ if (impl->flags & OIL_IMPL_FLAG_MMX)
+ printf(" mmx");
+ if (impl->flags & OIL_IMPL_FLAG_MMXEXT)
+ printf(" mmxext");
+ if (impl->flags & OIL_IMPL_FLAG_SSE)
+ printf(" sse");
+ if (impl->flags & OIL_IMPL_FLAG_SSE2)
+ printf(" sse2");
+#endif
+#ifdef HAVE_CPU_PPC
+ if (impl->flags & OIL_IMPL_FLAG_ALTIVEC)
+ printf(" altivec");
+#endif
+ printf("\n");
+ }
+ }
+
+ return 0;
+}
+