summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-05-27 05:19:44 +0000
committerDavid Schleef <ds@schleef.org>2005-05-27 05:19:44 +0000
commit0e7143e8fa8761f6dca4bb40b3e43e15de398fef (patch)
tree86a737df0631702912016f61bef19a0572d5a056
parent4f3db6dfe7a9052178501ea1443b715c95a34811 (diff)
downloadliboil-0e7143e8fa8761f6dca4bb40b3e43e15de398fef.tar.gz
* liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx):
fix asm contraints that gcc-4.0 doesn't like * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow), (conv_s32_f32_3dnow): remove mm0 constraint, it's irrelevant * liboil/conv/conv_sse.c: (conv_f32_s32_sse), (conv_s32_f64_sse): * liboil/copy/splat_ref.c: (splat_u32_ns_unroll4): add another unroll * liboil/liboilfunction.h: parentheses are good * liboil/sse/Makefile.am: add a separate directory for SSE intrinsics, since they need to be compiled with special flags. * liboil/sse/conv_sse.c: (conv_f32_s32_sse): * liboil/Makefile.am: add sse directory * configure.ac: cleanup
-rw-r--r--ChangeLog17
-rw-r--r--configure.ac17
-rw-r--r--liboil/Makefile.am5
-rw-r--r--liboil/colorspace/argb_paint_i386.c2
-rw-r--r--liboil/conv/conv_3dnow.c7
-rw-r--r--liboil/copy/splat_ref.c19
-rw-r--r--liboil/liboilfunction.h6
-rw-r--r--liboil/sse/Makefile.am22
-rw-r--r--liboil/sse/conv_sse.c88
9 files changed, 173 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 5512328..1b62d8d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2005-05-26 David Schleef <ds@schleef.org>
+
+ * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx):
+ fix asm contraints that gcc-4.0 doesn't like
+ * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow),
+ (conv_s32_f32_3dnow): remove mm0 constraint, it's irrelevant
+ * liboil/conv/conv_sse.c: (conv_f32_s32_sse), (conv_s32_f64_sse):
+ * liboil/copy/splat_ref.c: (splat_u32_ns_unroll4): add another
+ unroll
+ * liboil/liboilfunction.h: parentheses are good
+
+ * liboil/sse/Makefile.am: add a separate directory for SSE
+ intrinsics, since they need to be compiled with special flags.
+ * liboil/sse/conv_sse.c: (conv_f32_s32_sse):
+ * liboil/Makefile.am: add sse directory
+ * configure.ac: cleanup
+
2005-05-08 David Schleef <ds@schleef.org>
* patches/patch-remove-indirection: Some patches that might get
diff --git a/configure.ac b/configure.ac
index 1d00b6e..f73498a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -136,6 +136,7 @@ AM_CONDITIONAL(HAVE_CPU_IA64, test "x$HAVE_CPU_IA64" = "xyes")
AC_C_BIGENDIAN
+
AC_FUNC_MMAP()
AC_CHECK_LIB(m, rintf,
AC_DEFINE(HAVE_RINTF, 1, [Define if rintf() is available]))
@@ -169,6 +170,19 @@ if test x$HAVE_CPU_POWERPC = xyes ; then
true)
fi
+if test x$HAVE_CPU_I386 = xyes ; then
+ AS_COMPILER_FLAG(["-mmmx"], [MMX_CFLAGS="-mmmx"], true)
+ AS_COMPILER_FLAG(["-msse"], [SSE_CFLAGS="-msse"], true)
+ AS_COMPILER_FLAG(["-msse2"], [SSE2_CFLAGS="-msse2"], true)
+ AS_COMPILER_FLAG(["-msse3"], [SSE3_CFLAGS="-msse3"], true)
+ AS_COMPILER_FLAG(["-m3dnow"], [_3DNOW_CFLAGS="-m3dnow"], true)
+fi
+AC_SUBST(MMX_CFLAGS)
+AC_SUBST(SSE_CFLAGS)
+AC_SUBST(SSE2_CFLAGS)
+AC_SUBST(SSE3_CFLAGS)
+AC_SUBST(_3DNOW_CFLAGS)
+
LIBOIL_CFLAGS="$LIBOIL_CFLAGS -D_GNU_SOURCE -D_POSIX_C_SOURCE=200112L -I\$(top_srcdir) -O2"
AC_SUBST(LIBOIL_CFLAGS)
@@ -179,7 +193,7 @@ pkgconfigdir="\$(libdir)/pkgconfig"
AC_SUBST(pkgconfigdir)
#CFLAGS=`echo "$CFLAGS" | sed -e 's/-O[0-9*]//g'`
-CFLAGS="-g"
+#CFLAGS="-g"
AC_CONFIG_FILES([
Makefile
@@ -192,6 +206,7 @@ liboil/dct/Makefile
liboil/md5/Makefile
liboil/jpeg/Makefile
liboil/simdpack/Makefile
+liboil/sse/Makefile
liboil/utf8/Makefile
testsuite/Makefile
examples/Makefile
diff --git a/liboil/Makefile.am b/liboil/Makefile.am
index 51f234f..f33061f 100644
--- a/liboil/Makefile.am
+++ b/liboil/Makefile.am
@@ -1,7 +1,7 @@
pkgincludedir = $(includedir)/liboil-@LIBOIL_MAJORMINOR@/liboil
-SUBDIRS = colorspace conv copy dct jpeg simdpack md5 utf8
+SUBDIRS = colorspace conv copy dct jpeg simdpack md5 utf8 sse
lib_LTLIBRARIES = liboiltmp1.la liboil-@LIBOIL_MAJORMINOR@.la
@@ -28,6 +28,7 @@ liboilfunctions_la_LIBADD = \
jpeg/libjpeg.la \
md5/libmd5.la \
simdpack/libsimdpack.la \
+ sse/libsse.la \
utf8/libutf8.la \
$(LIBM)
liboilfunctions_la_LDFLAGS = \
@@ -60,7 +61,7 @@ liboil_@LIBOIL_MAJORMINOR@_la_SOURCES = \
liboil_@LIBOIL_MAJORMINOR@_la_LIBADD = \
liboilfunctions.la \
$(LIBM)
-liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS)
+liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS) -msse
liboil_@LIBOIL_MAJORMINOR@_la_LDFLAGS = \
-no-undefined \
-version-info $(LIBOIL_LIBVERSION) \
diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c
index 3753589..a91981c 100644
--- a/liboil/colorspace/argb_paint_i386.c
+++ b/liboil/colorspace/argb_paint_i386.c
@@ -83,7 +83,7 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
" decl %3\n"
" jne 1b\n"
" emms\n"
- : "+r" (dest), "+%%eax" (color), "+r" (alpha), "+r" (n)
+ : "+r" (dest), "+a" (color), "+r" (alpha), "+r" (n)
: "r" (&constants));
}
OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE);
diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c
index 01efd23..1b86b58 100644
--- a/liboil/conv/conv_3dnow.c
+++ b/liboil/conv/conv_3dnow.c
@@ -42,8 +42,9 @@ conv_f32_s16_3dnow (float *dst, int dst_stride, int16_t * src, int src_stride,
for (i = 0; i < n; i++) {
asm volatile (" movswl 0(%0), %%eax \n"
" movd %%eax, %%mm0 \n"
- " pi2fd %%mm0, %%mm0 \n" " movd %%mm0, 0(%1) \n"::"r" (src), "r" (dst)
- :"eax", "mm0");
+ " pi2fd %%mm0, %%mm0 \n" " movd %%mm0, 0(%1) \n"
+ ::"r" (src), "r" (dst)
+ :"eax");
dst = OIL_OFFSET (dst, dst_stride);
src = OIL_OFFSET (src, src_stride);
@@ -74,7 +75,7 @@ conv_s32_f32_3dnow (int32_t * dst, int dst_stride, float *src, int src_stride,
" movd %%mm1, 0(%1) \n"
:
:"r" (src), "r" (dst), "r" (constants)
- :"mm0");
+ );
dst = OIL_OFFSET (dst, dst_stride);
src = OIL_OFFSET (src, src_stride);
diff --git a/liboil/copy/splat_ref.c b/liboil/copy/splat_ref.c
index eb126eb..ddd1444 100644
--- a/liboil/copy/splat_ref.c
+++ b/liboil/copy/splat_ref.c
@@ -111,6 +111,25 @@ static void splat_u32_ns_unroll2 (uint32_t *dest, uint32_t *param, int n)
}
OIL_DEFINE_IMPL(splat_u32_ns_unroll2, splat_u32_ns);
+static void splat_u32_ns_unroll4 (uint32_t *dest, uint32_t *param, int n)
+{
+ int i;
+ while (n&3) {
+ *dest = *param;
+ dest++;
+ n--;
+ }
+ n >>= 2;
+ for(i=0;i<n;i++){
+ dest[0] = *param;
+ dest[1] = *param;
+ dest[2] = *param;
+ dest[3] = *param;
+ dest+=4;
+ }
+}
+OIL_DEFINE_IMPL(splat_u32_ns_unroll4, splat_u32_ns);
+
static void splat_u8_ns_memset (uint8_t *dest, uint8_t *param, int n)
{
memset (dest, *param, n);
diff --git a/liboil/liboilfunction.h b/liboil/liboilfunction.h
index 43ef222..802f42c 100644
--- a/liboil/liboilfunction.h
+++ b/liboil/liboilfunction.h
@@ -72,9 +72,9 @@ struct _OilFunctionImpl {
double profile_std;
};
-#define OIL_GET(ptr, offset, type) (*(type *)((uint8_t *)ptr + offset) )
-#define OIL_OFFSET(ptr, offset) ((void *)((uint8_t *)ptr + offset) )
-#define OIL_INCREMENT(ptr, offset) (ptr = (void *)((uint8_t *)ptr + offset) )
+#define OIL_GET(ptr, offset, type) (*(type *)((uint8_t *)ptr + (offset)) )
+#define OIL_OFFSET(ptr, offset) ((void *)((uint8_t *)ptr + (offset)) )
+#define OIL_INCREMENT(ptr, offset) (ptr = (void *)((uint8_t *)ptr + (offset)) )
#define OIL_IMPL_FLAG_REF (1<<0)
#define OIL_IMPL_FLAG_OPT (1<<1)
diff --git a/liboil/sse/Makefile.am b/liboil/sse/Makefile.am
new file mode 100644
index 0000000..98860c9
--- /dev/null
+++ b/liboil/sse/Makefile.am
@@ -0,0 +1,22 @@
+
+if USE_ALT_OPT
+opt_libs = libsse_opt.la
+else
+opt_libs =
+endif
+noinst_LTLIBRARIES = libsse.la $(opt_libs)
+
+if HAVE_CPU_I386
+c_sources = \
+ conv_sse.c
+endif
+
+libsse_la_SOURCES = $(c_sources)
+libsse_la_CFLAGS = $(MMX_CFLAGS) $(SSE_CFLAGS) $(SSE2_CFLAGS) $(LIBOIL_CFLAGS)
+
+libsse_opt_la_SOURCES = $(c_sources)
+libsse_opt_la_CFLAGS = $(LIBOIL_CFLAGS) $(LIBOIL_OPT_CFLAGS) $(MMX_CFLAGS) $(SSE_CFLAGS) $(SSE2_CFLAGS)
+
+libsse_la_SOURCES = \
+ $(c_sources)
+
diff --git a/liboil/sse/conv_sse.c b/liboil/sse/conv_sse.c
new file mode 100644
index 0000000..0c51973
--- /dev/null
+++ b/liboil/sse/conv_sse.c
@@ -0,0 +1,88 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include <liboil/liboilfunction.h>
+#include <emmintrin.h>
+
+#include <../conv/conv.h>
+
+
+static void
+conv_f32_s32_sse (float *dst, int dst_stride, int32_t * src, int src_stride,
+ int n)
+{
+ int i;
+ uint32_t tmp_dest_array[68], *tmp_dest;
+ int32_t tmp_src_array[68], *tmp_src;
+ __m128 xmm0 = { 0 };
+ __m128 xmm1 = { 0 };
+
+ tmp_dest = (void *)(((unsigned long)(tmp_dest_array) + 0xf)&~0xf);
+ tmp_src = (void *)(((unsigned long)(tmp_src_array) + 0xf)&~0xf);
+ while (n>64) {
+ for (i=0;i<64;i+=2){
+ tmp_src[i] = OIL_GET(src, src_stride * i, int32_t);
+ tmp_src[i+1] = OIL_GET(src, src_stride * (i+1), int32_t);
+ }
+ for (i=0;i<8;i++){
+ xmm0 = _mm_cvt_pi2ps (xmm0, *(__m64 *)(tmp_src + 8*i + 2));
+ xmm0 = _mm_movelh_ps (xmm0, xmm0);
+ xmm0 = _mm_cvt_pi2ps (xmm0, *(__m64 *)(tmp_src + 8*i));
+ _mm_store_ps (((float *)tmp_dest) + i*8, xmm0);
+ xmm1 = _mm_cvt_pi2ps (xmm1, *(__m64 *)(tmp_src + 8*i + 6));
+ xmm1 = _mm_movelh_ps (xmm1, xmm1);
+ xmm1 = _mm_cvt_pi2ps (xmm1, *(__m64 *)(tmp_src + 8*i + 4));
+ _mm_store_ps (((float *)tmp_dest) + i*8 + 4, xmm1);
+ }
+ for (i=0;i<64;i+=2){
+ OIL_GET(dst, dst_stride * i, int32_t) = tmp_dest[i];
+ OIL_GET(dst, dst_stride * (i+1), int32_t) = tmp_dest[i+1];
+ }
+ n-=64;
+ src = OIL_OFFSET (src, src_stride * 64);
+ dst = OIL_OFFSET (dst, dst_stride * 64);
+ }
+ for (i=0;i<n;i++){
+ tmp_src[i] = OIL_GET(src, src_stride * i, int32_t);
+ }
+ for (i=0;i<(n+3)/4;i++){
+ xmm0 = _mm_cvt_pi2ps (xmm0, ((__m64 *)tmp_src)[2*i+1]);
+ xmm0 = _mm_movelh_ps (xmm0, xmm0);
+ xmm0 = _mm_cvt_pi2ps (xmm0, ((__m64 *)tmp_src)[2*i]);
+ _mm_store_ps (((float *)tmp_dest) + i*4, xmm0);
+ }
+ for (i=0;i<n;i++){
+ OIL_GET(dst, dst_stride * i, int32_t) = tmp_dest[i];
+ }
+
+ _m_empty();
+}
+OIL_DEFINE_IMPL_FULL (conv_f32_s32_sse, conv_f32_s32, OIL_IMPL_FLAG_SSE);
+