diff options
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | configure.ac | 7 | ||||
-rw-r--r-- | liboil/Makefile.am | 4 | ||||
-rw-r--r-- | liboil/sse/Makefile.am | 3 | ||||
-rw-r--r-- | liboil/sse/composite_sse_2pix.c | 22 | ||||
-rw-r--r-- | liboil/sse/composite_sse_4pix.c | 23 | ||||
-rw-r--r-- | liboil/sse/sad8x8_sse.c | 4 | ||||
-rw-r--r-- | liboil/sse/sse_wrapper.h | 34 | ||||
-rw-r--r-- | m4/as-gcc-inline-assembly.m4 | 4 | ||||
-rw-r--r-- | testsuite/stack_align.c | 22 |
10 files changed, 104 insertions, 36 deletions
@@ -1,6 +1,23 @@ 2007-03-16 David Schleef <ds@schleef.org> * configure.ac: + * liboil/Makefile.am: + * liboil/sse/Makefile.am: + * liboil/sse/composite_sse_2pix.c: + * liboil/sse/composite_sse_4pix.c: + * liboil/sse/sad8x8_sse.c: + * liboil/sse/sse_wrapper.h: + Add idea to wrap SSE2 functions that have trouble with + unaligned stacks with a function that aligns the stack. From + Christian Aichinger. + * m4/as-gcc-inline-assembly.m4: + Fix test for gcc version. + * testsuite/stack_align.c: + improve somewhat + +2007-03-16 David Schleef <ds@schleef.org> + + * configure.ac: version bump to 0.3.11 2007-03-16 David Schleef <ds@schleef.org> diff --git a/configure.ac b/configure.ac index a59e4f4..9139d58 100644 --- a/configure.ac +++ b/configure.ac @@ -164,6 +164,12 @@ then fi AM_CONDITIONAL(HAVE_ASM_BLOCKS, test "x$HAVE_ASM_BLOCKS" = "xyes") +if test "x$HAVE_I386" = "xyes" +then + # I'd write a test for this, but as of 4.1.2, gcc is still broken + AC_DEFINE(USE_SSE_WRAPPER, 1, [Defined if SSE functions need stack alignment wrappers]) +fi + AS_MMX_INTRINSICS(MMX_CFLAGS, HAVE_MMX_INTRINSICS=yes, HAVE_MMX_INTRINSICS=no) AS_SSE_INTRINSICS(SSE_CFLAGS, HAVE_SSE_INTRINSICS=yes, HAVE_SSE_INTRINSICS=no) AS_SSE2_INTRINSICS(SSE2_CFLAGS, HAVE_SSE2_INTRINSICS=yes, HAVE_SSE2_INTRINSICS=no) @@ -188,6 +194,7 @@ AC_SUBST(_3DNOW_CFLAGS) AC_SUBST(_3DNOWEXT_CFLAGS) AC_SUBST(ALTIVEC_CFLAGS) + LIBOIL_CFLAGS="$LIBOIL_CFLAGS -D_BSD_SOURCE -D_GNU_SOURCE -I\$(top_srcdir)" AC_SUBST(LIBOIL_CFLAGS) diff --git a/liboil/Makefile.am b/liboil/Makefile.am index 073381c..a702c81 100644 --- a/liboil/Makefile.am +++ b/liboil/Makefile.am @@ -20,8 +20,8 @@ SUBDIRS += mmx libs += mmx/libmmx.la endif if HAVE_SSE2_INTRINSICS -#SUBDIRS += fb sse -#libs += fb/libfb.la sse/libsse.la +SUBDIRS += fb sse +libs += fb/libfb.la sse/libsse.la endif if HAVE_3DNOW_INTRINSICS #subdir_i386 += 3dnow diff --git a/liboil/sse/Makefile.am b/liboil/sse/Makefile.am index 5aa0c86..72ff44d 100644 --- a/liboil/sse/Makefile.am +++ b/liboil/sse/Makefile.am @@ -11,7 +11,8 @@ libsse_la_SOURCES = \ math_sse_unroll2.c \ multsum_sse.c \ sad8x8_sse.c \ - splat_sse.c + splat_sse.c \ + sse_wrapper.h libsse_la_CFLAGS = $(SSE_CFLAGS) $(SSE2_CFLAGS) $(LIBOIL_CFLAGS) diff --git a/liboil/sse/composite_sse_2pix.c b/liboil/sse/composite_sse_2pix.c index dd24853..cb64ced 100644 --- a/liboil/sse/composite_sse_2pix.c +++ b/liboil/sse/composite_sse_2pix.c @@ -32,6 +32,8 @@ #include <emmintrin.h> #include <liboil/liboilcolorspace.h> +#include "sse_wrapper.h" + /* non-SSE2 compositing support */ #define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m))) #define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s)) @@ -160,7 +162,7 @@ composite_in_argb_sse_2pix (uint32_t *dest, const uint32_t *src, COMPOSITE_IN(oil_argb_B(s), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_sse_2pix, composite_in_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_argb_sse_2pix, composite_in_argb, OIL_IMPL_FLAG_SSE2); static void @@ -188,9 +190,10 @@ composite_in_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src, COMPOSITE_IN(oil_argb_B(*src), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_argb_const_src_sse_2pix, composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2); +#ifdef SSE_ALIGN static void composite_in_argb_const_mask_sse_2pix (uint32_t *dest, const uint32_t *src, const uint8_t *mask, int n) @@ -216,8 +219,9 @@ composite_in_argb_const_mask_sse_2pix (uint32_t *dest, const uint32_t *src, COMPOSITE_IN(oil_argb_B(s), mask[0])); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_argb_const_mask_sse_2pix, composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2); +#endif static void composite_over_argb_sse_2pix (uint32_t *dest, const uint32_t *src, int n) @@ -242,7 +246,7 @@ composite_over_argb_sse_2pix (uint32_t *dest, const uint32_t *src, int n) *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_over_argb_sse_2pix, composite_over_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_over_argb_sse_2pix, composite_over_argb, OIL_IMPL_FLAG_SSE2); static void @@ -272,7 +276,7 @@ composite_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_over_argb_const_src_sse_2pix, composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -309,7 +313,7 @@ composite_in_over_argb_sse_2pix (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix, composite_in_over_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_over_argb_sse_2pix, composite_in_over_argb, OIL_IMPL_FLAG_SSE2); static void @@ -348,7 +352,7 @@ composite_in_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_over_argb_const_src_sse_2pix, composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -387,7 +391,7 @@ composite_in_over_argb_const_mask_sse_2pix (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_over_argb_const_mask_sse_2pix, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void @@ -414,5 +418,5 @@ composite_over_u8_sse_2pix (uint8_t *dest, const uint8_t *src, int n) dest++; } } -OIL_DEFINE_IMPL_FULL (composite_over_u8_sse_2pix, composite_over_u8, +OIL_DEFINE_IMPL_FULL_WRAPPER(composite_over_u8_sse_2pix, composite_over_u8, OIL_IMPL_FLAG_SSE2); diff --git a/liboil/sse/composite_sse_4pix.c b/liboil/sse/composite_sse_4pix.c index be9dd5a..7614976 100644 --- a/liboil/sse/composite_sse_4pix.c +++ b/liboil/sse/composite_sse_4pix.c @@ -32,6 +32,10 @@ #include <emmintrin.h> #include <liboil/liboilcolorspace.h> +#include "sse_wrapper.h" + + + union m128_int { __m128i m128; uint64_t ull[2]; @@ -193,7 +197,7 @@ composite_in_argb_sse (uint32_t *dest, const uint32_t *src, const uint8_t *mask, COMPOSITE_IN(oil_argb_B(s), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_sse, composite_in_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_argb_sse, composite_in_argb, OIL_IMPL_FLAG_SSE2); static void @@ -230,7 +234,7 @@ composite_in_argb_const_src_sse (uint32_t *dest, const uint32_t *src, COMPOSITE_IN(oil_argb_B(*src), m)); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_argb_const_src_sse, composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -267,7 +271,7 @@ composite_in_argb_const_mask_sse (uint32_t *dest, const uint32_t *src, COMPOSITE_IN(oil_argb_B(s), mask[0])); } } -OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_argb_const_mask_sse, composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void @@ -302,7 +306,7 @@ composite_over_argb_sse (uint32_t *dest, const uint32_t *src, int n) *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_over_argb_sse, composite_over_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_over_argb_sse, composite_over_argb, OIL_IMPL_FLAG_SSE2); static void @@ -339,7 +343,7 @@ composite_over_argb_const_src_sse (uint32_t *dest, const uint32_t *src, int n) *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_over_argb_const_src_sse, composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -392,7 +396,7 @@ composite_in_over_argb_sse (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse, composite_in_over_argb, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_over_argb_sse, composite_in_over_argb, OIL_IMPL_FLAG_SSE2); static void @@ -447,7 +451,7 @@ composite_in_over_argb_const_src_sse (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_over_argb_const_src_sse, composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2); static void @@ -502,7 +506,7 @@ composite_in_over_argb_const_mask_sse (uint32_t *dest, const uint32_t *src, *dest++ = d; } } -OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_over_argb_const_mask_sse, composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2); static void @@ -529,5 +533,6 @@ composite_over_u8_sse (uint8_t *dest, const uint8_t *src, int n) dest++; } } -OIL_DEFINE_IMPL_FULL (composite_over_u8_sse, composite_over_u8, +OIL_DEFINE_IMPL_FULL_WRAPPER (composite_over_u8_sse, composite_over_u8, OIL_IMPL_FLAG_SSE2); + diff --git a/liboil/sse/sad8x8_sse.c b/liboil/sse/sad8x8_sse.c index ec532e6..c1b66d2 100644 --- a/liboil/sse/sad8x8_sse.c +++ b/liboil/sse/sad8x8_sse.c @@ -31,6 +31,8 @@ #include <liboil/liboilfunction.h> #include <emmintrin.h> +#include "sse_wrapper.h" + union m128_int { __m128i m128; uint32_t i[4]; @@ -60,4 +62,4 @@ sad8x8_u8_sse (uint32_t *dest, uint8_t *src1, int sstr1, uint8_t *src2, sumi.m128 = sum; *dest = sumi.i[0] + sumi.i[2]; } -OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2); +OIL_DEFINE_IMPL_FULL_WRAPPER(sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2); diff --git a/liboil/sse/sse_wrapper.h b/liboil/sse/sse_wrapper.h new file mode 100644 index 0000000..bffba95 --- /dev/null +++ b/liboil/sse/sse_wrapper.h @@ -0,0 +1,34 @@ +#ifndef __SSE_WRAPPER_H__ +#define __SSE_WRAPPER_H__ + +/* A massive hack to work around gcc (and mono) alignment bugs. This + * realigns the stack to 16 bytes when calling a wrapped function. */ + +#ifdef USE_SSE_WRAPPER +#define OIL_SSE_WRAPPER(func) \ +static void func () __attribute__ ((used)); \ +static void func ## _wrapper (void) \ +{ \ + __asm__ __volatile__ ("\n" \ + " subl $0x20, %%esp\n" \ + " andl $0xfffffff0, %%esp\n" \ + " movdqu 0x08(%%ebp), %%xmm0\n" \ + " movdqa %%xmm0, 0x00(%%esp)\n" \ + " movdqu 0x18(%%ebp), %%xmm0\n" \ + " movdqa %%xmm0, 0x10(%%esp)\n" \ + " call " #func "\n" \ + " movl %%ebp, %%esp\n" \ + : : : "xmm0"); \ + (void)&func; \ +} + +#define OIL_DEFINE_IMPL_FULL_WRAPPER(func,klass,flags) \ +OIL_SSE_WRAPPER(func) \ +OIL_DEFINE_IMPL_FULL(func ## _wrapper, klass, flags) +#else +#define OIL_DEFINE_IMPL_FULL_WRAPPER(func,klass,flags) \ +OIL_DEFINE_IMPL_FULL(func, klass, flags) +#endif + +#endif + diff --git a/m4/as-gcc-inline-assembly.m4 b/m4/as-gcc-inline-assembly.m4 index 70cc7d0..72e56c2 100644 --- a/m4/as-gcc-inline-assembly.m4 +++ b/m4/as-gcc-inline-assembly.m4 @@ -4,7 +4,7 @@ dnl autostars m4 macro for detection of gcc inline assembly dnl David Schleef <ds@schleef.org> -dnl $Id: as-gcc-inline-assembly.m4,v 1.3 2007-03-16 23:30:02 ds Exp $ +dnl $Id: as-gcc-inline-assembly.m4,v 1.4 2007-03-17 02:03:30 ds Exp $ dnl AS_COMPILER_FLAG(ACTION-IF-ACCEPTED, [ACTION-IF-NOT-ACCEPTED]) dnl Tries to compile with the given CFLAGS. @@ -17,7 +17,7 @@ AC_DEFUN([AS_GCC_INLINE_ASSEMBLY], AC_TRY_COMPILE([], [ #ifdef __GNUC_MINOR__ -#if __GNUC_MAJOR__ * 1000 + __GNUC_MINOR__ < 3004 +#if (__GNUC__ * 1000 + __GNUC_MINOR__) < 3004 #error GCC before 3.4 has critical bugs compiling inline assembly #endif #endif diff --git a/testsuite/stack_align.c b/testsuite/stack_align.c index f028cf7..a6f9e78 100644 --- a/testsuite/stack_align.c +++ b/testsuite/stack_align.c @@ -43,7 +43,7 @@ #include <liboil/liboiltest.h> #include <liboil/liboilcpu.h> -int verbose = 0; +int verbose = 1; /* Amount by which results of different types are allowed to deviate from the * reference. @@ -292,27 +292,25 @@ int check_class(OilFunctionClass *klass) { OilTest *test; int failed = 0; - int i; + int align; + int step = 4; oil_class_optimize (klass); if(verbose) printf("checking class %s\n", klass->name); test = oil_test_new(klass); - for (i=0; i < OIL_ARG_LAST; i++) { - int align; - int step = 4; #ifdef HAVE_AMD64 - step = 16; + step = 16; #endif - for (align = 0; align <= 32; align += step) { - realign_klass = klass; - realign_align = align; - realign(align); - failed |= realign_return; - } + for (align = 0; align <= 32; align += step) { + printf(" alignment %d\n", align); + realign_klass = klass; + realign_align = align; + realign(align); + failed |= realign_return; } oil_test_free (test); |