diff options
author | David Schleef <ds@schleef.org> | 2005-06-16 06:46:06 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-06-16 06:46:06 +0000 |
commit | 508ba3985f38081917b76fcfc4ac84e73ca2954a (patch) | |
tree | bd56b3f8c781a82c6f018a6d4a03d2c33fdd7a5b | |
parent | 7ffccb74ac2ebfd0b6f8361d4016b0ceb3c9581f (diff) | |
download | liboil-508ba3985f38081917b76fcfc4ac84e73ca2954a.tar.gz |
* configure.ac: Add instruction checker
* testsuite/Makefile.am:
* testsuite/instruction/Makefile.am:
* testsuite/instruction/check-instructions.pl:
* testsuite/instruction/list-impls.c: (main):
* liboil/colorspace/argb_paint_i386.c: Fix flags based on advice of
the instruction checker
* liboil/colorspace/ayuv2argb_i386.c:
* liboil/conv/conv_3dnow.c:
* liboil/conv/conv_sse.c:
* liboil/copy/trans8x8_i386.c:
* liboil/dct/idct8x8_i386.c:
* liboil/sse/conv_sse.c:
* liboil/liboilfuncs.h: update
* liboil/liboilmarshal.c: (_oil_test_marshal_function): update
* liboil/liboiltest.c: (oil_test_new), (oil_test_check_function):
regenerate inplace data for every test iteration. Bump default
n to 1000 to force memcpy to choose a good function. (lame hack)
* liboil/copy/copy_i386.c: (copy_u8_mmx3), (copy_u8_mmx4),
(copy_u8_mmx5): new implementation, fix others
* liboil/copy/splat_i386.c: (splat_u32_ns_mmx): make faster
* liboil/copy/splat_ref.c: (splat_u8_ns_int): fix bug
* liboil/colorspace/argb_paint.c: (argb_splat_u8_ref),
(rgba_splat_u8_ref): New functions
* liboil/simdpack/average2_u8.c: (average2_u8_ref),
(average2_u8_trick), (average2_u8_fast), (average2_u8_unroll4):
Implementations really need to follow stride rules.
* liboil/Makefile.am: Don't use SSE flags, because people on
powerpc don't appreciate it.
* examples/memcpy-speed.c: (main): only go to 1<<24 bytes
-rw-r--r-- | ChangeLog | 35 | ||||
-rw-r--r-- | configure.ac | 1 | ||||
-rw-r--r-- | examples/memcpy-speed.c | 2 | ||||
-rw-r--r-- | liboil/Makefile.am | 2 | ||||
-rw-r--r-- | liboil/colorspace/argb_paint.c | 35 | ||||
-rw-r--r-- | liboil/colorspace/argb_paint_i386.c | 2 | ||||
-rw-r--r-- | liboil/colorspace/ayuv2argb_i386.c | 6 | ||||
-rw-r--r-- | liboil/conv/conv_3dnow.c | 4 | ||||
-rw-r--r-- | liboil/conv/conv_sse.c | 4 | ||||
-rw-r--r-- | liboil/copy/copy_i386.c | 137 | ||||
-rw-r--r-- | liboil/copy/splat_i386.c | 13 | ||||
-rw-r--r-- | liboil/copy/splat_ref.c | 2 | ||||
-rw-r--r-- | liboil/copy/trans8x8_i386.c | 2 | ||||
-rw-r--r-- | liboil/dct/idct8x8_i386.c | 4 | ||||
-rw-r--r-- | liboil/liboilfuncs.h | 12 | ||||
-rw-r--r-- | liboil/liboilmarshal.c | 18 | ||||
-rw-r--r-- | liboil/liboiltest.c | 12 | ||||
-rw-r--r-- | liboil/simdpack/average2_u8.c | 35 | ||||
-rw-r--r-- | liboil/sse/conv_sse.c | 2 | ||||
-rw-r--r-- | testsuite/Makefile.am | 2 | ||||
-rw-r--r-- | testsuite/instruction/Makefile.am | 15 | ||||
-rwxr-xr-x | testsuite/instruction/check-instructions.pl | 444 | ||||
-rw-r--r-- | testsuite/instruction/list-impls.c | 83 |
23 files changed, 817 insertions, 55 deletions
@@ -1,3 +1,38 @@ +2005-06-15 David Schleef <ds@schleef.org> + + * configure.ac: Add instruction checker + * testsuite/Makefile.am: + * testsuite/instruction/Makefile.am: + * testsuite/instruction/check-instructions.pl: + * testsuite/instruction/list-impls.c: (main): + + * liboil/colorspace/argb_paint_i386.c: Fix flags based on advice of + the instruction checker + * liboil/colorspace/ayuv2argb_i386.c: + * liboil/conv/conv_3dnow.c: + * liboil/conv/conv_sse.c: + * liboil/copy/trans8x8_i386.c: + * liboil/dct/idct8x8_i386.c: + * liboil/sse/conv_sse.c: + + * liboil/liboilfuncs.h: update + * liboil/liboilmarshal.c: (_oil_test_marshal_function): update + * liboil/liboiltest.c: (oil_test_new), (oil_test_check_function): + regenerate inplace data for every test iteration. Bump default + n to 1000 to force memcpy to choose a good function. (lame hack) + * liboil/copy/copy_i386.c: (copy_u8_mmx3), (copy_u8_mmx4), + (copy_u8_mmx5): new implementation, fix others + * liboil/copy/splat_i386.c: (splat_u32_ns_mmx): make faster + * liboil/copy/splat_ref.c: (splat_u8_ns_int): fix bug + * liboil/colorspace/argb_paint.c: (argb_splat_u8_ref), + (rgba_splat_u8_ref): New functions + * liboil/simdpack/average2_u8.c: (average2_u8_ref), + (average2_u8_trick), (average2_u8_fast), (average2_u8_unroll4): + Implementations really need to follow stride rules. + * liboil/Makefile.am: Don't use SSE flags, because people on + powerpc don't appreciate it. + * examples/memcpy-speed.c: (main): only go to 1<<24 bytes + 2005-06-02 David Schleef <ds@schleef.org> * examples/Makefile.am: diff --git a/configure.ac b/configure.ac index f73498a..5a28079 100644 --- a/configure.ac +++ b/configure.ac @@ -209,6 +209,7 @@ liboil/simdpack/Makefile liboil/sse/Makefile liboil/utf8/Makefile testsuite/Makefile +testsuite/instruction/Makefile examples/Makefile examples/huffman/Makefile examples/jpeg/Makefile diff --git a/examples/memcpy-speed.c b/examples/memcpy-speed.c index 394625c..8d7fc69 100644 --- a/examples/memcpy-speed.c +++ b/examples/memcpy-speed.c @@ -44,7 +44,7 @@ main(int argc, char *argv[]) oil_class_choose_by_name (klass, impl->name); - for(i=10;i<26;i++){ + for(i=10;i<24;i++){ oil_profile_init (&prof); for(j=0;j<10;j++){ if (use_memset) { diff --git a/liboil/Makefile.am b/liboil/Makefile.am index f33061f..5711500 100644 --- a/liboil/Makefile.am +++ b/liboil/Makefile.am @@ -61,7 +61,7 @@ liboil_@LIBOIL_MAJORMINOR@_la_SOURCES = \ liboil_@LIBOIL_MAJORMINOR@_la_LIBADD = \ liboilfunctions.la \ $(LIBM) -liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS) -msse +liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS) liboil_@LIBOIL_MAJORMINOR@_la_LDFLAGS = \ -no-undefined \ -version-info $(LIBOIL_LIBVERSION) \ diff --git a/liboil/colorspace/argb_paint.c b/liboil/colorspace/argb_paint.c index 7bbb168..edbdcda 100644 --- a/liboil/colorspace/argb_paint.c +++ b/liboil/colorspace/argb_paint.c @@ -33,6 +33,8 @@ #include <liboil/liboilfunction.h> OIL_DEFINE_CLASS (argb_paint_u8, "uint8_t *i_4xn, uint8_t *s1_4, uint8_t *s2_n, int n"); +OIL_DEFINE_CLASS (argb_splat_u8, "uint8_t *i_4xn, uint8_t *s1_4, int n"); +OIL_DEFINE_CLASS (rgba_splat_u8, "uint8_t *i_4xn, uint8_t *s1_4, int n"); #define div255(x) (((x + 128) + ((x + 128)>>8))>>8) @@ -56,6 +58,39 @@ argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) OIL_DEFINE_IMPL_REF (argb_paint_u8_ref, argb_paint_u8); static void +argb_splat_u8_ref (uint8_t *dest, uint8_t *color, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[0] = blend(color[0],dest[0],color[0]); + dest[1] = blend(color[1],dest[1],color[0]); + dest[2] = blend(color[2],dest[2],color[0]); + dest[3] = blend(color[3],dest[3],color[0]); + dest+=4; + } + +} +OIL_DEFINE_IMPL_REF (argb_splat_u8_ref, argb_splat_u8); + +static void +rgba_splat_u8_ref (uint8_t *dest, uint8_t *color, int n) +{ + int i; + + for(i=0;i<n;i++){ + dest[0] = blend(color[0],dest[0],color[3]); + dest[1] = blend(color[1],dest[1],color[3]); + dest[2] = blend(color[2],dest[2],color[3]); + dest[3] = blend(color[3],dest[3],color[3]); + dest+=4; + } + +} +OIL_DEFINE_IMPL_REF (rgba_splat_u8_ref, rgba_splat_u8); + + +static void argb_paint_u8_fast (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) { int i; diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c index a91981c..92c05ed 100644 --- a/liboil/colorspace/argb_paint_i386.c +++ b/liboil/colorspace/argb_paint_i386.c @@ -86,6 +86,6 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) : "+r" (dest), "+a" (color), "+r" (alpha), "+r" (n) : "r" (&constants)); } -OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); diff --git a/liboil/colorspace/ayuv2argb_i386.c b/liboil/colorspace/ayuv2argb_i386.c index fbd16ae..a10a8da 100644 --- a/liboil/colorspace/ayuv2argb_i386.c +++ b/liboil/colorspace/ayuv2argb_i386.c @@ -87,7 +87,7 @@ ayuv2argb_u8_mmx (uint8_t *argb, uint8_t *ayuv, int n) : "r" (&constants)); } -OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); static short constants2[][4] = { @@ -132,7 +132,7 @@ ayuv2argb_u8_mmx2 (uint8_t *argb, uint8_t *ayuv, int n) } -OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); static void ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n) @@ -170,5 +170,5 @@ ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n) : "r" (&constants2)); } -OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT); diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c index 1b86b58..4c6024e 100644 --- a/liboil/conv/conv_3dnow.c +++ b/liboil/conv/conv_3dnow.c @@ -52,7 +52,7 @@ conv_f32_s16_3dnow (float *dst, int dst_stride, int16_t * src, int src_stride, asm volatile ("emms"); } -OIL_DEFINE_IMPL_FULL (conv_f32_s16_3dnow, conv_f32_s16, OIL_IMPL_FLAG_3DNOW); +OIL_DEFINE_IMPL_FULL (conv_f32_s16_3dnow, conv_f32_s16, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_3DNOW); /* suboptimal */ static void @@ -83,4 +83,4 @@ conv_s32_f32_3dnow (int32_t * dst, int dst_stride, float *src, int src_stride, asm volatile ("emms"); } -OIL_DEFINE_IMPL_FULL (conv_s32_f32_3dnow, conv_s32_f32, OIL_IMPL_FLAG_3DNOW); +OIL_DEFINE_IMPL_FULL (conv_s32_f32_3dnow, conv_s32_f32, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_3DNOW); diff --git a/liboil/conv/conv_sse.c b/liboil/conv/conv_sse.c index fbdf640..f4d383d 100644 --- a/liboil/conv/conv_sse.c +++ b/liboil/conv/conv_sse.c @@ -32,7 +32,6 @@ #include <conv.h> -#ifdef __GNUC__ /* suboptimal */ static void conv_f64_s32_sse(double *dst, int dst_stride, int32_t *src, int src_stride, int n) { @@ -51,7 +50,7 @@ static void conv_f64_s32_sse(double *dst, int dst_stride, int32_t *src, int src_ } } OIL_DEFINE_IMPL_FULL(conv_f64_s32_sse, conv_f64_s32, - OIL_IMPL_FLAG_SSE2); + OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE2); /* suboptimal */ static void conv_s32_f64_sse(int32_t *dst, int dst_stride, double *src, int src_stride, int n) @@ -72,5 +71,4 @@ static void conv_s32_f64_sse(int32_t *dst, int dst_stride, double *src, int src_ } OIL_DEFINE_IMPL_FULL(conv_s32_f64_sse, conv_s32_f64, OIL_IMPL_FLAG_SSE2); -#endif diff --git a/liboil/copy/copy_i386.c b/liboil/copy/copy_i386.c index 29bc9e0..1a8450b 100644 --- a/liboil/copy/copy_i386.c +++ b/liboil/copy/copy_i386.c @@ -95,37 +95,154 @@ OIL_DEFINE_IMPL_FULL (copy_u8_mmx2, copy_u8, OIL_IMPL_FLAG_MMX); static void copy_u8_mmx3 (uint8_t *dest, uint8_t *src, int n) { - while(n&0x3) { + /* make sure destination is cache-line aligned for output */ + if (n < 64) { + while (n>0) { + *dest++ = *src++; + n--; + } + return; + } + while (((unsigned long)dest) & 0x3) { *dest++ = *src++; n--; } - while (n&0x3c) { + while (((unsigned long)dest) & 0x3f) { *(uint32_t *)dest = *(uint32_t *)src; dest += 4; src += 4; n-=4; } - if (n) asm volatile ( + if (n > 64) asm volatile ( " mov $0, %%eax\n" "1:\n" //" prefetchnta 128(%1,%%eax)\n" - " pxor (%1,%%eax), %%mm0\n" - " pxor 8(%1,%%eax), %%mm1\n" - " pxor 16(%1,%%eax), %%mm2\n" - " pxor 24(%1,%%eax), %%mm3\n" + " movq (%1,%%eax), %%mm0\n" + " movq 8(%1,%%eax), %%mm1\n" + " movq 16(%1,%%eax), %%mm2\n" + " movq 24(%1,%%eax), %%mm3\n" + " movq 32(%1,%%eax), %%mm4\n" + " movq 40(%1,%%eax), %%mm5\n" + " movq 48(%1,%%eax), %%mm6\n" + " movq 56(%1,%%eax), %%mm7\n" " movntq %%mm0, (%0,%%eax)\n" " movntq %%mm1, 8(%0,%%eax)\n" " movntq %%mm2, 16(%0,%%eax)\n" " movntq %%mm3, 24(%0,%%eax)\n" - " add $32, %%eax\n" + " movntq %%mm4, 32(%0,%%eax)\n" + " movntq %%mm5, 40(%0,%%eax)\n" + " movntq %%mm6, 48(%0,%%eax)\n" + " movntq %%mm7, 56(%0,%%eax)\n" + " add $64, %%eax\n" " decl %%ecx\n" " jne 1b\n" + " sfence\n" " emms\n" : "+r" (dest), "+r" (src) - : "c" (n/32) + : "c" (n>>6) : "eax"); + + dest += n&(~(0x3f)); + src += n&(~(0x3f)); + n &= 0x3f; + while (n > 3) { + *(uint32_t *)dest = *(uint32_t *)src; + dest += 4; + src += 4; + n-=4; + } + while (n > 0) { + *dest++ = *src++; + n--; + } } -OIL_DEFINE_IMPL_FULL (copy_u8_mmx3, copy_u8, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (copy_u8_mmx3, copy_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); +static void +copy_u8_mmx4 (uint8_t *dest, uint8_t *src, int n) +{ + /* make sure destination is cache-line aligned for output */ + if (n < 32) { + while (n>0) { + *dest++ = *src++; + n--; + } + return; + } + while (((unsigned long)dest) & 0x3) { + *dest++ = *src++; + n--; + } + while (((unsigned long)dest) & 0x1f) { + *(uint32_t *)dest = *(uint32_t *)src; + dest += 4; + src += 4; + n-=4; + } + if (n > 32) asm volatile ( + " mov $0, %%eax\n" + "1:\n" + //" prefetchnta 128(%1,%%eax)\n" + " movq (%1,%%eax), %%mm0\n" + " movq 8(%1,%%eax), %%mm1\n" + " movq 16(%1,%%eax), %%mm2\n" + " movq 24(%1,%%eax), %%mm3\n" + " movntq %%mm0, (%0,%%eax)\n" + " movntq %%mm1, 8(%0,%%eax)\n" + " movntq %%mm2, 16(%0,%%eax)\n" + " movntq %%mm3, 24(%0,%%eax)\n" + " add $32, %%eax\n" + " decl %%ecx\n" + " jne 1b\n" + " sfence\n" + " emms\n" + : "+r" (dest), "+r" (src) + : "c" (n>>5) + : "eax"); + + dest += n&(~(0x1f)); + src += n&(~(0x1f)); + n &= 0x1f; + while (n > 3) { + *(uint32_t *)dest = *(uint32_t *)src; + dest += 4; + src += 4; + n-=4; + } + while (n > 0) { + *dest++ = *src++; + n--; + } +} +OIL_DEFINE_IMPL_FULL (copy_u8_mmx4, copy_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); +static void +copy_u8_mmx5 (uint8_t *dest, uint8_t *src, int n) +{ + while (n&0xc) { + *(uint32_t *)dest = *(uint32_t *)src; + dest += 4; + src += 4; + n-=4; + } + while(n&0xf) { + *dest++ = *src++; + n--; + } + if (n) asm volatile ( + " mov $0, %%eax\n" + "1:\n" + " movq (%1,%%eax), %%mm0\n" + " movq 8(%1,%%eax), %%mm1\n" + " movq %%mm0, (%0,%%eax)\n" + " movq %%mm1, 8(%0,%%eax)\n" + " add $16, %%eax\n" + " decl %%ecx\n" + " jne 1b\n" + " emms\n" + : "+r" (dest), "+r" (src) + : "c" (n/16) + : "eax"); +} +OIL_DEFINE_IMPL_FULL (copy_u8_mmx5, copy_u8, OIL_IMPL_FLAG_MMX); diff --git a/liboil/copy/splat_i386.c b/liboil/copy/splat_i386.c index 4bc17c0..5106476 100644 --- a/liboil/copy/splat_i386.c +++ b/liboil/copy/splat_i386.c @@ -37,7 +37,7 @@ OIL_DECLARE_CLASS(splat_u32_ns); static void splat_u32_ns_mmx (uint32_t *dest, uint32_t *src, int n) { - while(n&0xf) { + while(n&0x7) { *dest++ = *src; n--; } @@ -50,19 +50,16 @@ splat_u32_ns_mmx (uint32_t *dest, uint32_t *src, int n) " movntq %%mm0, 8(%0,%%eax)\n" " movntq %%mm0, 16(%0,%%eax)\n" " movntq %%mm0, 24(%0,%%eax)\n" - " movntq %%mm0, 32(%0,%%eax)\n" - " movntq %%mm0, 40(%0,%%eax)\n" - " movntq %%mm0, 48(%0,%%eax)\n" - " movntq %%mm0, 56(%0,%%eax)\n" - " add $64, %%eax\n" + " add $32, %%eax\n" " decl %%ecx\n" " jne 1b\n" + " sfence\n" " emms\n" : "+r" (dest), "+r" (src) - : "c" (n/16) + : "c" (n/8) : "eax"); } -OIL_DEFINE_IMPL_FULL (splat_u32_ns_mmx, splat_u32_ns, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (splat_u32_ns_mmx, splat_u32_ns, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); diff --git a/liboil/copy/splat_ref.c b/liboil/copy/splat_ref.c index ddd1444..40f636b 100644 --- a/liboil/copy/splat_ref.c +++ b/liboil/copy/splat_ref.c @@ -144,7 +144,7 @@ static void splat_u8_ns_int (uint8_t *dest, uint8_t *param, int n) dest++; n--; } - n >>= 4; + n >>= 2; p = (*param<<24) | (*param<<16) | (*param<<8) | (*param); while(n>0){ *(uint32_t *)dest = p; diff --git a/liboil/copy/trans8x8_i386.c b/liboil/copy/trans8x8_i386.c index f0de418..12881e4 100644 --- a/liboil/copy/trans8x8_i386.c +++ b/liboil/copy/trans8x8_i386.c @@ -134,7 +134,7 @@ trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) : "eax"); } -OIL_DEFINE_IMPL (trans8x8_u16_mmx, trans8x8_u16); +OIL_DEFINE_IMPL_FULL (trans8x8_u16_mmx, trans8x8_u16, OIL_IMPL_FLAG_MMX); static void trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr) diff --git a/liboil/dct/idct8x8_i386.c b/liboil/dct/idct8x8_i386.c index 436039c..e8a88c2 100644 --- a/liboil/dct/idct8x8_i386.c +++ b/liboil/dct/idct8x8_i386.c @@ -388,7 +388,7 @@ idct8x8_s16_mmx (int16_t *dest, int dstr, int16_t *src, int sstr) : "m" (dest), "m" (src), "m" (dstr), "m" (sstr), "r" (tmp), "r" (dct_mmx_constants), "m" (save_ebx) : "eax", "ecx", "edx"); } -OIL_DEFINE_IMPL_FULL (idct8x8_s16_mmx, idct8x8_s16, OIL_IMPL_FLAG_MMX); +OIL_DEFINE_IMPL_FULL (idct8x8_s16_mmx, idct8x8_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); #if 0 #define CONST(x) (32768.0*(x) + 0.5) @@ -740,5 +740,5 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) : "eax", "ecx", "edx"); } -OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL (fdct8x8s_s16_mmx, fdct8x8s_s16, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT); diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h index 2744781..f855a73 100644 --- a/liboil/liboilfuncs.h +++ b/liboil/liboilfuncs.h @@ -54,6 +54,9 @@ typedef void (*_oil_type_abs_u8_s8)(uint8_t * dest, int dstr, const int8_t * src extern OilFunctionClass *oil_function_class_ptr_argb_paint_u8; typedef void (*_oil_type_argb_paint_u8)(uint8_t * i_4xn, const uint8_t * s1_4, const uint8_t * s2_n, int n); #define oil_argb_paint_u8 ((_oil_type_argb_paint_u8)(*(void **)oil_function_class_ptr_argb_paint_u8)) +extern OilFunctionClass *oil_function_class_ptr_argb_splat_u8; +typedef void (*_oil_type_argb_splat_u8)(uint8_t * i_4xn, const uint8_t * s1_4, int n); +#define oil_argb_splat_u8 ((_oil_type_argb_splat_u8)(*(void **)oil_function_class_ptr_argb_splat_u8)) extern OilFunctionClass *oil_function_class_ptr_average2_u8; typedef void (*_oil_type_average2_u8)(uint8_t * dest, int dstr, const uint8_t * src1, int sstr1, const uint8_t * src2, int sstr2, int n); #define oil_average2_u8 ((_oil_type_average2_u8)(*(void **)oil_function_class_ptr_average2_u8)) @@ -450,12 +453,21 @@ typedef void (*_oil_type_permute_u32)(uint32_t * dest, int dstr, const uint32_t extern OilFunctionClass *oil_function_class_ptr_permute_u8; typedef void (*_oil_type_permute_u8)(uint8_t * dest, int dstr, const uint8_t * src1, int sstr1, const int32_t * src2, int sstr2, int n); #define oil_permute_u8 ((_oil_type_permute_u8)(*(void **)oil_function_class_ptr_permute_u8)) +extern OilFunctionClass *oil_function_class_ptr_resample_linear_argb_u8; +typedef void (*_oil_type_resample_linear_argb_u8)(uint8_t * d_4xn, const uint8_t * s_8xn, int n, uint32_t * i_2); +#define oil_resample_linear_argb_u8 ((_oil_type_resample_linear_argb_u8)(*(void **)oil_function_class_ptr_resample_linear_argb_u8)) +extern OilFunctionClass *oil_function_class_ptr_resample_linear_u8; +typedef void (*_oil_type_resample_linear_u8)(uint8_t * d_n, const uint8_t * s_2xn, int n, uint32_t * i_2); +#define oil_resample_linear_u8 ((_oil_type_resample_linear_u8)(*(void **)oil_function_class_ptr_resample_linear_u8)) extern OilFunctionClass *oil_function_class_ptr_rgb2bgr; typedef void (*_oil_type_rgb2bgr)(uint8_t * d_3xn, const uint8_t * s_3xn, int n); #define oil_rgb2bgr ((_oil_type_rgb2bgr)(*(void **)oil_function_class_ptr_rgb2bgr)) extern OilFunctionClass *oil_function_class_ptr_rgb2rgba; typedef void (*_oil_type_rgb2rgba)(uint8_t * d_4xn, const uint8_t * s_3xn, int n); #define oil_rgb2rgba ((_oil_type_rgb2rgba)(*(void **)oil_function_class_ptr_rgb2rgba)) +extern OilFunctionClass *oil_function_class_ptr_rgba_splat_u8; +typedef void (*_oil_type_rgba_splat_u8)(uint8_t * i_4xn, const uint8_t * s1_4, int n); +#define oil_rgba_splat_u8 ((_oil_type_rgba_splat_u8)(*(void **)oil_function_class_ptr_rgba_splat_u8)) extern OilFunctionClass *oil_function_class_ptr_sad8x8_f64; typedef void (*_oil_type_sad8x8_f64)(double * d_8x8, int ds, const double * s1_8x8, int ss1, const double * s2_8x8, int ss2); #define oil_sad8x8_f64 ((_oil_type_sad8x8_f64)(*(void **)oil_function_class_ptr_sad8x8_f64)) diff --git a/liboil/liboilmarshal.c b/liboil/liboilmarshal.c index 3631aa6..4ee633a 100644 --- a/liboil/liboilmarshal.c +++ b/liboil/liboilmarshal.c @@ -52,18 +52,18 @@ _oil_test_marshal_function (void *func, unsigned long *args, int n_args, ((void *)args[0],(void *)args[1],(void *)args[2],(int)args[3]); oil_profile_stop (prof); break; - case 0x00d4: - oil_profile_start (prof); - ((void (*)(void *,int,void *,int,void *,int,int))func) - ((void *)args[0],(int)args[1],(void *)args[2],(int)args[3],(void *)args[4],(int)args[5],(int)args[6]); - oil_profile_stop (prof); - break; case 0x000e: oil_profile_start (prof); ((void (*)(void *,void *,int))func) ((void *)args[0],(void *)args[1],(int)args[2]); oil_profile_stop (prof); break; + case 0x00d4: + oil_profile_start (prof); + ((void (*)(void *,int,void *,int,void *,int,int))func) + ((void *)args[0],(int)args[1],(void *)args[2],(int)args[3],(void *)args[4],(int)args[5],(int)args[6]); + oil_profile_stop (prof); + break; case 0x00d3: oil_profile_start (prof); ((void (*)(void *,int,void *,int,int,void *,void *))func) @@ -118,6 +118,12 @@ _oil_test_marshal_function (void *func, unsigned long *args, int n_args, (); oil_profile_stop (prof); break; + case 0x001d: + oil_profile_start (prof); + ((void (*)(void *,void *,int,void *))func) + ((void *)args[0],(void *)args[1],(int)args[2],(void *)args[3]); + oil_profile_stop (prof); + break; case 0x003a: oil_profile_start (prof); ((void (*)(void *,void *,int,void *,int))func) diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c index 5e3d13d..2f5c430 100644 --- a/liboil/liboiltest.c +++ b/liboil/liboiltest.c @@ -79,7 +79,7 @@ oil_test_new (OilFunctionClass *klass) } test->iterations = 10; - test->n = 100; + test->n = 1000; test->m = 100; return test; @@ -172,6 +172,16 @@ oil_test_check_function (void * priv) oil_profile_init (&test->prof); for(i=0;i<test->iterations;i++){ + int k; + + for(k=0;k<test->proto->n_params;k++){ + OilParameter *p; + j = test->proto->params[k].parameter_type; + p = &test->params[j]; + if (p->direction == 'i') { + memcpy (p->test_data, p->src_data, p->size); + } + } _oil_test_marshal_function (test->impl->func, args, test->proto->n_params, pointer_mask, &test->prof); } diff --git a/liboil/simdpack/average2_u8.c b/liboil/simdpack/average2_u8.c index 3c3f546..fe07aa8 100644 --- a/liboil/simdpack/average2_u8.c +++ b/liboil/simdpack/average2_u8.c @@ -45,7 +45,7 @@ average2_u8_ref (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, int i; for (i = 0; i < n; i++) { - dest[i] = (src1[sstr1 * i] + src2[sstr2 * i]) >> 1; + dest[dstr * i] = (src1[sstr1 * i] + src2[sstr2 * i]) >> 1; } } @@ -57,7 +57,8 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, { unsigned int x, y, d; - if (sstr1 == 1 && sstr2 == 1) { +#if 0 + if (sstr1 == 1 && sstr2 == 1 && dstr == 1) { while (n > 0) { x = *(unsigned int *) src1; y = *(unsigned int *) src2; @@ -67,8 +68,9 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, dest += 4; n -= 4; } - } - else { + } else +#endif + { while (n > 0) { x = (src1[0] << 24) | (src1[sstr1] << 16) | (src1[2 * sstr1] << 8) | (src1[3 * sstr1]); @@ -76,12 +78,12 @@ average2_u8_trick (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, sstr2] << 8) | (src2[3 * sstr2]); d = (((x ^ y) & 0xfefefefe) >> 1) + (x & y); dest[0] = (d >> 24); - dest[1] = (d >> 16); - dest[2] = (d >> 8); - dest[3] = (d >> 0); + dest[1*dstr] = (d >> 16); + dest[2*dstr] = (d >> 8); + dest[3*dstr] = (d >> 0); src1 += 4 * sstr1; src2 += 4 * sstr2; - dest += 4; + dest += 4 * dstr; n -= 4; } } @@ -94,9 +96,10 @@ average2_u8_fast (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, uint8_t *src2, int sstr2, int n) { while (n > 0) { - *dest++ = (*src1 + *src2) >> 1; + *dest = (*src1 + *src2) >> 1; src1 += sstr1; src2 += sstr2; + dest += dstr; n--; } } @@ -108,22 +111,26 @@ average2_u8_unroll4 (uint8_t * dest, int dstr, uint8_t *src1, int sstr1, uint8_t *src2, int sstr2, int n) { while (n & 0x3) { - *dest++ = (*src1 + *src2) >> 1; + *dest = (*src1 + *src2) >> 1; src1 += sstr1; src2 += sstr2; n--; } while (n > 0) { - *dest++ = (*src1 + *src2) >> 1; + *dest = (*src1 + *src2) >> 1; + dest += dstr; src1 += sstr1; src2 += sstr2; - *dest++ = (*src1 + *src2) >> 1; + *dest = (*src1 + *src2) >> 1; + dest += dstr; src1 += sstr1; src2 += sstr2; - *dest++ = (*src1 + *src2) >> 1; + *dest = (*src1 + *src2) >> 1; + dest += dstr; src1 += sstr1; src2 += sstr2; - *dest++ = (*src1 + *src2) >> 1; + *dest = (*src1 + *src2) >> 1; + dest += dstr; src1 += sstr1; src2 += sstr2; n -= 4; diff --git a/liboil/sse/conv_sse.c b/liboil/sse/conv_sse.c index 0c51973..3602221 100644 --- a/liboil/sse/conv_sse.c +++ b/liboil/sse/conv_sse.c @@ -84,5 +84,5 @@ conv_f32_s32_sse (float *dst, int dst_stride, int32_t * src, int src_stride, _m_empty(); } -OIL_DEFINE_IMPL_FULL (conv_f32_s32_sse, conv_f32_s32, OIL_IMPL_FLAG_SSE); +OIL_DEFINE_IMPL_FULL (conv_f32_s32_sse, conv_f32_s32, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_SSE); diff --git a/testsuite/Makefile.am b/testsuite/Makefile.am index 0bc4c72..db7c347 100644 --- a/testsuite/Makefile.am +++ b/testsuite/Makefile.am @@ -1,4 +1,6 @@ +SUBDIRS = instruction + if HAVE_GLIB glib_programs = abs md5 md5_profile trans copy else diff --git a/testsuite/instruction/Makefile.am b/testsuite/instruction/Makefile.am new file mode 100644 index 0000000..804d643 --- /dev/null +++ b/testsuite/instruction/Makefile.am @@ -0,0 +1,15 @@ + +noinst_PROGRAMS = list-impls + +noinst_DATA = report + +AM_LDFLAGS = $(LIBOIL_LIBS) +AM_CFLAGS = $(LIBOIL_CFLAGS) + + +report: list-impls + ./check-instructions.pl $(top_builddir)/liboil/.libs/liboiltmp1.so >report + + +EXTRA_CLEAN = report + diff --git a/testsuite/instruction/check-instructions.pl b/testsuite/instruction/check-instructions.pl new file mode 100755 index 0000000..fa250ee --- /dev/null +++ b/testsuite/instruction/check-instructions.pl @@ -0,0 +1,444 @@ +#!/usr/bin/perl -w +# vi: set ts=4: +# + +$debug = 0; + +sub get_flags +{ + my @list = @insns; + my $exts = {}; + my $s; + + $debug && print "function: $func\n"; + foreach $insn (@list) { + if (grep { /^$insn$/ } @normal_list) { + $debug && print " $insn: normal\n"; + }elsif (grep { /^$insn$/ } @mmx_list) { + $exts->{"mmx"} = 1; + $debug && print " $insn: mmx\n"; + }elsif (grep { /^$insn$/ } @mmx_ext_list) { + $exts->{"mmxext"} = 1; + $debug && print " $insn: mmxext\n"; + }elsif (grep { /^$insn$/ } @_3dnow_list) { + $exts->{"3dnow"} = 1; + $debug && print " $insn: 3dnow\n"; + }elsif (grep { /^$insn$/ } @_3dnow_ext_list) { + $exts->{"3dnowext"} = 1; + $debug && print " $insn: 3dnowext\n"; + }elsif (grep { /^$insn$/ } @sse_list) { + $exts->{"sse"} = 1; + $debug && print " $insn: sse\n"; + }elsif (grep { /^$insn$/ } @sse2_list) { + $exts->{"sse2"} = 1; + $debug && print " $insn: sse2\n"; + }else { + print "FIXME:\t\"$insn\",\n"; + $error = 1; + } + } + $s = join(" ",sort(keys(%$exts))); + $funcs->{"$func"} = $s; + $debug && print " FLAGS: $s\n"; +} + +sub check +{ + foreach $insn (@normal_list) { + if (grep { /^$insn$/ } @mmx_list) { + print "FIXME: $insn is in mmx_list\n"; + $error = 1; + } elsif (grep { /^$insn$/ } @mmx_ext_list) { + print "FIXME: $insn is in mmx_ext_list\n"; + $error = 1; + } elsif (grep { /^$insn$/ } @_3dnow_list) { + print "FIXME: $insn is in _3dnow_list\n"; + $error = 1; + } elsif (grep { /^$insn$/ } @_3dnow_ext_list) { + print "FIXME: $insn is in _3dnow_ext_list\n"; + $error = 1; + } elsif (grep { /^$insn$/ } @sse_list) { + print "FIXME: $insn is in sse_list\n"; + $error = 1; + } elsif (grep { /^$insn$/ } @sse2_list) { + print "FIXME: $insn is in sse2_list\n"; + $error = 1; + } + } +} + +@normal_list = ( + "add", + "addl", + "and", + "andl", + "call", + "cld", + "cltd", + "cmovg", + "cmp", + "cmpb", + "cmpl", + "cwtl", + "dec", + "decl", + "fabs", + "fadd", + "faddl", + "faddp", + "fadds", + "fdivrl", + "fild", + "fildl", + "fildll", + "fistp", + "fistpl", + "fistpll", + "fldcw", + "fldl", + "flds", + "fldz", + "fmul", + "fmull", + "fmulp", + "fmuls", + "fnstcw", + "fnstsw", + "fsqrt", + "fstl", + "fstp", + "fstpl", + "fstps", + "fsts", + "fsub", + "fsubl", + "fsubp", + "fsubr", + "fsubrl", + "fsubrp", + "fucom", + "fucomp", + "fucompp", + "fxch", + "imul", + "inc", + "incl", + "ja", + "jae", + "jbe", + "je", + "jg", + "jge", + "jl", + "jle", + "jmp", + "jne", + "jns", + "jp", + "js", + "lea", + "leave", + "mov", + "movb", + "movl", + "movsbl", + "movsbw", + "movswl", + "movsww", + "movzbl", + "movzbw", + "movzwl", + "mulb", + "neg", + "nop", + "not", + "or", + "pop", + "push", + "pushl", + "repz", + "ret", + "rol", + "sahf", + "sar", + "sarl", + "shl", + "shr", + "sub", + "subl", + "test", + "testb", + "testl", + "xor", +); + +# verified +@mmx_list = ( + "emms", + "movd", + "movq", + "packssdw", + "packsswb", + "packuswb", + "paddb", + "paddd", + "paddsb", + "paddsw", + "paddusb", + "paddusw", + "paddw", + "pand", + "pandn", + "pcmpeqb", + "pcmpeqd", + "pcmpgtb", + "pcmpgtd", + "pcmpgtw", + "pmaddwd", + "pmulhw", + "pmullw", + "por", + "pslld", + "psllq", + "psllw", + "psrad", + "psraw", + "psrld", + "psrlq", + "psrlw", + "psubb", + "psubd", + "psubsb", + "psubsw", + "psubusb", + "psubusw", + "psubw", + "punpckhbw", + "punpckhdq", + "punpckhwd", + "punpcklbw", + "punpckldq", + "punpcklwd", + "pxor" +); + +# verified +@_3dnow_list = ( + "femms", + "pavgusb", + "pf2id", + "pfacc", + "pfadd", + "pfcmpeq", + "pfcmpge", + "pfcmpgt", + "pfmax", + "pfmin", + "pfmul", + "pfrcp", + "pfrcpit1", + "pfrcpit2", + "pfrsqit1", + "pfrsqrt", + "pfsub", + "pfsubr", + "pi2fd", + "pmulhrw", + "prefetch", + "prefetchw" +); + +# verified +@_3dnow_ext_list = ( + "pf2iw", + "pfnacc", + "pfpnacc", + "pi2fw", + "pswapd" +); + +# verified +@mmx_ext_list = ( + "maskmovq", + "movntq", + "pavgb", + "pavgw", + "pextrw", + "pinsrw", + "pmaxsw", + "pmaxub", + "pminsw", + "pminub", + "pmovmskb", + "pmulhuw", + "prefetchnta", + "prefetch0", + "prefetch1", + "prefetch2", + "psadbw", + "pshufw", + "sfence" +); + +# verified +@sse_list = ( + "addps", + "addss", + "andnps", + "andps", + "cmpps", + "cmpss", + "comiss", + "cvtpi2ps", + "cvtps2ps", + "cvtsi2ss", + "cvtss2si", + "cvttps2pi", + "cvttss2si", + "divps", + "divss", + "fxrstor", + "fxsave", + "ldmxcsr", + "maxps", + "maxss", + "minps", + "minss", + "movaps", + "movhlps", + "movhps", + "movlhps", + "movlps", + "movmskps", + "movss", + "movups", + "mulps", + "mulss", + "orps", + "rcpps", + "rcpss", + "rsqrtps", + "rsqrtss", + "shufps", + "sqrtps", + "sqrtss", + "stmxcsr", + "subps", + "subss", + "ucomiss", + "unpckhps", + "unpcklps", + "xorps" +); + +@sse2_list = ( + "addpd", + "addsd", + "andnpd", + "andpd", + "cmppd", + "cmpsd", + "comisd", + "cvtpi2pd", + "cvtpd2pi", + "cvtsi2sd", + "cvtsd2si", + "cvttpd2pi", + "cvttsd2si", + "cvtpd2ps", + "cvtps2pd", + "cvtsd2ss", + "cvtss2sd", + "cvtps2dq", + "cvttpd2dq", + "cvtdq2pd", + "cvtps2dq", + "cvttps2dq", + "cvtdq2ps", + "divpd", + "divsd", + "maxpd", + "maxsd", + "minsd", + "movapd", + "movhpd", + "movlpd", + "movmskpd", + "movsd", + "movupd", + "mulpd", + "mulsd", + "orpd", + "shufpd", + "sqrtpd", + "sqrtsd", + "subpd", + "subsd", + "ucomisd", + "unpckhpd", + "unpcklpd", + "xorpd", +); + +$funcs = {}; + +$ARGV=shift @ARGV; +@output=`objdump -dr $ARGV`; + +check(); + +$error = 0; +@insns = (); +while($_=shift @output){ + chomp; + if(m/^0[0-9a-fA-F]+\s<[\.\w]+>:$/){ + $f = $_; + $f =~ s/^0[0-9a-fA-F]+\s<([\.\w]+)>:$/$1/; + + if (@insns) { + get_flags (); + } + + $func = $f; + + @insns = (); + $debug && print "$func:\n"; + + } elsif(m/^[\s0-9a-f]+:\s[\s0-9a-f]+\s([a-z0-9]+)\s/){ + if (!grep { /$1/ } @insns) { + push @insns, $1; + } + #print " $1\n"; + } elsif (m/^$/) { + } elsif (m/^Disassembly of section/) { + } elsif (m/\sfile format\s/) { + } else { + print "FIXME: $_\n"; + $error = 1; + } +} + +@source = `./list-impls`; +while ($_ = shift @source) { + chomp; + if (m/^([\w\.]+):\s*([\w\s*]*)/) { + $func = $1; + $flags = $2; + + $xflags = $funcs->{$func}; + if (1) { + if ($flags ne $xflags) { + print "$func: \"$flags\" should be \"$xflags\"\n"; + } + } else { + print "FIXME: function \"$func\" has no disassembly\n"; + $error = 1; + } + } else { + print "FIXME: bad match: $_\n"; + } +} + +exit $error; + + diff --git a/testsuite/instruction/list-impls.c b/testsuite/instruction/list-impls.c new file mode 100644 index 0000000..905f64b --- /dev/null +++ b/testsuite/instruction/list-impls.c @@ -0,0 +1,83 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2004 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <liboil/liboil.h> +#include <liboil/liboilfunction.h> +#include <liboil/liboilcpu.h> +#include <liboil/liboiltest.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> + +int +main (int argc, char *argv[]) +{ + int n; + int i; + + oil_init(); + + n = oil_class_get_n_classes (); + for (i = 0; i < n; i++){ + OilFunctionClass *klass = oil_class_get_by_index (i); + OilFunctionImpl *impl; + + for (impl = klass->first_impl; impl; impl = impl->next) { + printf("%s:", impl->name); +#ifdef HAVE_CPU_I386 + if (impl->flags & OIL_IMPL_FLAG_3DNOW) + printf(" 3dnow"); + if (impl->flags & OIL_IMPL_FLAG_3DNOWEXT) + printf(" 3dnowext"); + if (impl->flags & OIL_IMPL_FLAG_CMOV) + printf(" cmov"); + if (impl->flags & OIL_IMPL_FLAG_MMX) + printf(" mmx"); + if (impl->flags & OIL_IMPL_FLAG_MMXEXT) + printf(" mmxext"); + if (impl->flags & OIL_IMPL_FLAG_SSE) + printf(" sse"); + if (impl->flags & OIL_IMPL_FLAG_SSE2) + printf(" sse2"); +#endif +#ifdef HAVE_CPU_PPC + if (impl->flags & OIL_IMPL_FLAG_ALTIVEC) + printf(" altivec"); +#endif + printf("\n"); + } + } + + return 0; +} + |