summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--configure.ac7
-rw-r--r--liboil/Makefile.am4
-rw-r--r--liboil/sse/Makefile.am3
-rw-r--r--liboil/sse/composite_sse_2pix.c22
-rw-r--r--liboil/sse/composite_sse_4pix.c23
-rw-r--r--liboil/sse/sad8x8_sse.c4
-rw-r--r--liboil/sse/sse_wrapper.h34
-rw-r--r--m4/as-gcc-inline-assembly.m44
-rw-r--r--testsuite/stack_align.c22
10 files changed, 104 insertions, 36 deletions
diff --git a/ChangeLog b/ChangeLog
index cb4200e..149279b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,23 @@
2007-03-16 David Schleef <ds@schleef.org>
* configure.ac:
+ * liboil/Makefile.am:
+ * liboil/sse/Makefile.am:
+ * liboil/sse/composite_sse_2pix.c:
+ * liboil/sse/composite_sse_4pix.c:
+ * liboil/sse/sad8x8_sse.c:
+ * liboil/sse/sse_wrapper.h:
+ Add idea to wrap SSE2 functions that have trouble with
+ unaligned stacks with a function that aligns the stack. From
+ Christian Aichinger.
+ * m4/as-gcc-inline-assembly.m4:
+ Fix test for gcc version.
+ * testsuite/stack_align.c:
+ improve somewhat
+
+2007-03-16 David Schleef <ds@schleef.org>
+
+ * configure.ac:
version bump to 0.3.11
2007-03-16 David Schleef <ds@schleef.org>
diff --git a/configure.ac b/configure.ac
index a59e4f4..9139d58 100644
--- a/configure.ac
+++ b/configure.ac
@@ -164,6 +164,12 @@ then
fi
AM_CONDITIONAL(HAVE_ASM_BLOCKS, test "x$HAVE_ASM_BLOCKS" = "xyes")
+if test "x$HAVE_I386" = "xyes"
+then
+ # I'd write a test for this, but as of 4.1.2, gcc is still broken
+ AC_DEFINE(USE_SSE_WRAPPER, 1, [Defined if SSE functions need stack alignment wrappers])
+fi
+
AS_MMX_INTRINSICS(MMX_CFLAGS, HAVE_MMX_INTRINSICS=yes, HAVE_MMX_INTRINSICS=no)
AS_SSE_INTRINSICS(SSE_CFLAGS, HAVE_SSE_INTRINSICS=yes, HAVE_SSE_INTRINSICS=no)
AS_SSE2_INTRINSICS(SSE2_CFLAGS, HAVE_SSE2_INTRINSICS=yes, HAVE_SSE2_INTRINSICS=no)
@@ -188,6 +194,7 @@ AC_SUBST(_3DNOW_CFLAGS)
AC_SUBST(_3DNOWEXT_CFLAGS)
AC_SUBST(ALTIVEC_CFLAGS)
+
LIBOIL_CFLAGS="$LIBOIL_CFLAGS -D_BSD_SOURCE -D_GNU_SOURCE -I\$(top_srcdir)"
AC_SUBST(LIBOIL_CFLAGS)
diff --git a/liboil/Makefile.am b/liboil/Makefile.am
index 073381c..a702c81 100644
--- a/liboil/Makefile.am
+++ b/liboil/Makefile.am
@@ -20,8 +20,8 @@ SUBDIRS += mmx
libs += mmx/libmmx.la
endif
if HAVE_SSE2_INTRINSICS
-#SUBDIRS += fb sse
-#libs += fb/libfb.la sse/libsse.la
+SUBDIRS += fb sse
+libs += fb/libfb.la sse/libsse.la
endif
if HAVE_3DNOW_INTRINSICS
#subdir_i386 += 3dnow
diff --git a/liboil/sse/Makefile.am b/liboil/sse/Makefile.am
index 5aa0c86..72ff44d 100644
--- a/liboil/sse/Makefile.am
+++ b/liboil/sse/Makefile.am
@@ -11,7 +11,8 @@ libsse_la_SOURCES = \
math_sse_unroll2.c \
multsum_sse.c \
sad8x8_sse.c \
- splat_sse.c
+ splat_sse.c \
+ sse_wrapper.h
libsse_la_CFLAGS = $(SSE_CFLAGS) $(SSE2_CFLAGS) $(LIBOIL_CFLAGS)
diff --git a/liboil/sse/composite_sse_2pix.c b/liboil/sse/composite_sse_2pix.c
index dd24853..cb64ced 100644
--- a/liboil/sse/composite_sse_2pix.c
+++ b/liboil/sse/composite_sse_2pix.c
@@ -32,6 +32,8 @@
#include <emmintrin.h>
#include <liboil/liboilcolorspace.h>
+#include "sse_wrapper.h"
+
/* non-SSE2 compositing support */
#define COMPOSITE_OVER(d,s,m) ((d) + (s) - oil_muldiv_255((d),(m)))
#define COMPOSITE_ADD(d,s) oil_clamp_255((d) + (s))
@@ -160,7 +162,7 @@ composite_in_argb_sse_2pix (uint32_t *dest, const uint32_t *src,
COMPOSITE_IN(oil_argb_B(s), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_sse_2pix, composite_in_argb,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_argb_sse_2pix, composite_in_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -188,9 +190,10 @@ composite_in_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src,
COMPOSITE_IN(oil_argb_B(*src), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse_2pix,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_argb_const_src_sse_2pix,
composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2);
+#ifdef SSE_ALIGN
static void
composite_in_argb_const_mask_sse_2pix (uint32_t *dest, const uint32_t *src,
const uint8_t *mask, int n)
@@ -216,8 +219,9 @@ composite_in_argb_const_mask_sse_2pix (uint32_t *dest, const uint32_t *src,
COMPOSITE_IN(oil_argb_B(s), mask[0]));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse_2pix,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_argb_const_mask_sse_2pix,
composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2);
+#endif
static void
composite_over_argb_sse_2pix (uint32_t *dest, const uint32_t *src, int n)
@@ -242,7 +246,7 @@ composite_over_argb_sse_2pix (uint32_t *dest, const uint32_t *src, int n)
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_argb_sse_2pix, composite_over_argb,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_over_argb_sse_2pix, composite_over_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -272,7 +276,7 @@ composite_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse_2pix,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_over_argb_const_src_sse_2pix,
composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -309,7 +313,7 @@ composite_in_over_argb_sse_2pix (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse_2pix, composite_in_over_argb,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_over_argb_sse_2pix, composite_in_over_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -348,7 +352,7 @@ composite_in_over_argb_const_src_sse_2pix (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse_2pix,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_over_argb_const_src_sse_2pix,
composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -387,7 +391,7 @@ composite_in_over_argb_const_mask_sse_2pix (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse_2pix,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_in_over_argb_const_mask_sse_2pix,
composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
@@ -414,5 +418,5 @@ composite_over_u8_sse_2pix (uint8_t *dest, const uint8_t *src, int n)
dest++;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_u8_sse_2pix, composite_over_u8,
+OIL_DEFINE_IMPL_FULL_WRAPPER(composite_over_u8_sse_2pix, composite_over_u8,
OIL_IMPL_FLAG_SSE2);
diff --git a/liboil/sse/composite_sse_4pix.c b/liboil/sse/composite_sse_4pix.c
index be9dd5a..7614976 100644
--- a/liboil/sse/composite_sse_4pix.c
+++ b/liboil/sse/composite_sse_4pix.c
@@ -32,6 +32,10 @@
#include <emmintrin.h>
#include <liboil/liboilcolorspace.h>
+#include "sse_wrapper.h"
+
+
+
union m128_int {
__m128i m128;
uint64_t ull[2];
@@ -193,7 +197,7 @@ composite_in_argb_sse (uint32_t *dest, const uint32_t *src, const uint8_t *mask,
COMPOSITE_IN(oil_argb_B(s), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_sse, composite_in_argb,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_argb_sse, composite_in_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -230,7 +234,7 @@ composite_in_argb_const_src_sse (uint32_t *dest, const uint32_t *src,
COMPOSITE_IN(oil_argb_B(*src), m));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_src_sse,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_argb_const_src_sse,
composite_in_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -267,7 +271,7 @@ composite_in_argb_const_mask_sse (uint32_t *dest, const uint32_t *src,
COMPOSITE_IN(oil_argb_B(s), mask[0]));
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_argb_const_mask_sse,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_argb_const_mask_sse,
composite_in_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
@@ -302,7 +306,7 @@ composite_over_argb_sse (uint32_t *dest, const uint32_t *src, int n)
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_argb_sse, composite_over_argb,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_over_argb_sse, composite_over_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -339,7 +343,7 @@ composite_over_argb_const_src_sse (uint32_t *dest, const uint32_t *src, int n)
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_argb_const_src_sse,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_over_argb_const_src_sse,
composite_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -392,7 +396,7 @@ composite_in_over_argb_sse (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_sse, composite_in_over_argb,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_over_argb_sse, composite_in_over_argb,
OIL_IMPL_FLAG_SSE2);
static void
@@ -447,7 +451,7 @@ composite_in_over_argb_const_src_sse (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_src_sse,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_over_argb_const_src_sse,
composite_in_over_argb_const_src, OIL_IMPL_FLAG_SSE2);
static void
@@ -502,7 +506,7 @@ composite_in_over_argb_const_mask_sse (uint32_t *dest, const uint32_t *src,
*dest++ = d;
}
}
-OIL_DEFINE_IMPL_FULL (composite_in_over_argb_const_mask_sse,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_in_over_argb_const_mask_sse,
composite_in_over_argb_const_mask, OIL_IMPL_FLAG_SSE2);
static void
@@ -529,5 +533,6 @@ composite_over_u8_sse (uint8_t *dest, const uint8_t *src, int n)
dest++;
}
}
-OIL_DEFINE_IMPL_FULL (composite_over_u8_sse, composite_over_u8,
+OIL_DEFINE_IMPL_FULL_WRAPPER (composite_over_u8_sse, composite_over_u8,
OIL_IMPL_FLAG_SSE2);
+
diff --git a/liboil/sse/sad8x8_sse.c b/liboil/sse/sad8x8_sse.c
index ec532e6..c1b66d2 100644
--- a/liboil/sse/sad8x8_sse.c
+++ b/liboil/sse/sad8x8_sse.c
@@ -31,6 +31,8 @@
#include <liboil/liboilfunction.h>
#include <emmintrin.h>
+#include "sse_wrapper.h"
+
union m128_int {
__m128i m128;
uint32_t i[4];
@@ -60,4 +62,4 @@ sad8x8_u8_sse (uint32_t *dest, uint8_t *src1, int sstr1, uint8_t *src2,
sumi.m128 = sum;
*dest = sumi.i[0] + sumi.i[2];
}
-OIL_DEFINE_IMPL_FULL (sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
+OIL_DEFINE_IMPL_FULL_WRAPPER(sad8x8_u8_sse, sad8x8_u8, OIL_IMPL_FLAG_SSE2);
diff --git a/liboil/sse/sse_wrapper.h b/liboil/sse/sse_wrapper.h
new file mode 100644
index 0000000..bffba95
--- /dev/null
+++ b/liboil/sse/sse_wrapper.h
@@ -0,0 +1,34 @@
+#ifndef __SSE_WRAPPER_H__
+#define __SSE_WRAPPER_H__
+
+/* A massive hack to work around gcc (and mono) alignment bugs. This
+ * realigns the stack to 16 bytes when calling a wrapped function. */
+
+#ifdef USE_SSE_WRAPPER
+#define OIL_SSE_WRAPPER(func) \
+static void func () __attribute__ ((used)); \
+static void func ## _wrapper (void) \
+{ \
+ __asm__ __volatile__ ("\n" \
+ " subl $0x20, %%esp\n" \
+ " andl $0xfffffff0, %%esp\n" \
+ " movdqu 0x08(%%ebp), %%xmm0\n" \
+ " movdqa %%xmm0, 0x00(%%esp)\n" \
+ " movdqu 0x18(%%ebp), %%xmm0\n" \
+ " movdqa %%xmm0, 0x10(%%esp)\n" \
+ " call " #func "\n" \
+ " movl %%ebp, %%esp\n" \
+ : : : "xmm0"); \
+ (void)&func; \
+}
+
+#define OIL_DEFINE_IMPL_FULL_WRAPPER(func,klass,flags) \
+OIL_SSE_WRAPPER(func) \
+OIL_DEFINE_IMPL_FULL(func ## _wrapper, klass, flags)
+#else
+#define OIL_DEFINE_IMPL_FULL_WRAPPER(func,klass,flags) \
+OIL_DEFINE_IMPL_FULL(func, klass, flags)
+#endif
+
+#endif
+
diff --git a/m4/as-gcc-inline-assembly.m4 b/m4/as-gcc-inline-assembly.m4
index 70cc7d0..72e56c2 100644
--- a/m4/as-gcc-inline-assembly.m4
+++ b/m4/as-gcc-inline-assembly.m4
@@ -4,7 +4,7 @@ dnl autostars m4 macro for detection of gcc inline assembly
dnl David Schleef <ds@schleef.org>
-dnl $Id: as-gcc-inline-assembly.m4,v 1.3 2007-03-16 23:30:02 ds Exp $
+dnl $Id: as-gcc-inline-assembly.m4,v 1.4 2007-03-17 02:03:30 ds Exp $
dnl AS_COMPILER_FLAG(ACTION-IF-ACCEPTED, [ACTION-IF-NOT-ACCEPTED])
dnl Tries to compile with the given CFLAGS.
@@ -17,7 +17,7 @@ AC_DEFUN([AS_GCC_INLINE_ASSEMBLY],
AC_TRY_COMPILE([], [
#ifdef __GNUC_MINOR__
-#if __GNUC_MAJOR__ * 1000 + __GNUC_MINOR__ < 3004
+#if (__GNUC__ * 1000 + __GNUC_MINOR__) < 3004
#error GCC before 3.4 has critical bugs compiling inline assembly
#endif
#endif
diff --git a/testsuite/stack_align.c b/testsuite/stack_align.c
index f028cf7..a6f9e78 100644
--- a/testsuite/stack_align.c
+++ b/testsuite/stack_align.c
@@ -43,7 +43,7 @@
#include <liboil/liboiltest.h>
#include <liboil/liboilcpu.h>
-int verbose = 0;
+int verbose = 1;
/* Amount by which results of different types are allowed to deviate from the
* reference.
@@ -292,27 +292,25 @@ int check_class(OilFunctionClass *klass)
{
OilTest *test;
int failed = 0;
- int i;
+ int align;
+ int step = 4;
oil_class_optimize (klass);
if(verbose) printf("checking class %s\n", klass->name);
test = oil_test_new(klass);
- for (i=0; i < OIL_ARG_LAST; i++) {
- int align;
- int step = 4;
#ifdef HAVE_AMD64
- step = 16;
+ step = 16;
#endif
- for (align = 0; align <= 32; align += step) {
- realign_klass = klass;
- realign_align = align;
- realign(align);
- failed |= realign_return;
- }
+ for (align = 0; align <= 32; align += step) {
+ printf(" alignment %d\n", align);
+ realign_klass = klass;
+ realign_align = align;
+ realign(align);
+ failed |= realign_return;
}
oil_test_free (test);