diff options
author | David Schleef <ds@schleef.org> | 2005-05-27 05:19:44 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-05-27 05:19:44 +0000 |
commit | 0e7143e8fa8761f6dca4bb40b3e43e15de398fef (patch) | |
tree | 86a737df0631702912016f61bef19a0572d5a056 | |
parent | 4f3db6dfe7a9052178501ea1443b715c95a34811 (diff) | |
download | liboil-0e7143e8fa8761f6dca4bb40b3e43e15de398fef.tar.gz |
* liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx):
fix asm contraints that gcc-4.0 doesn't like
* liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow),
(conv_s32_f32_3dnow): remove mm0 constraint, it's irrelevant
* liboil/conv/conv_sse.c: (conv_f32_s32_sse), (conv_s32_f64_sse):
* liboil/copy/splat_ref.c: (splat_u32_ns_unroll4): add another
unroll
* liboil/liboilfunction.h: parentheses are good
* liboil/sse/Makefile.am: add a separate directory for SSE
intrinsics, since they need to be compiled with special flags.
* liboil/sse/conv_sse.c: (conv_f32_s32_sse):
* liboil/Makefile.am: add sse directory
* configure.ac: cleanup
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | configure.ac | 17 | ||||
-rw-r--r-- | liboil/Makefile.am | 5 | ||||
-rw-r--r-- | liboil/colorspace/argb_paint_i386.c | 2 | ||||
-rw-r--r-- | liboil/conv/conv_3dnow.c | 7 | ||||
-rw-r--r-- | liboil/copy/splat_ref.c | 19 | ||||
-rw-r--r-- | liboil/liboilfunction.h | 6 | ||||
-rw-r--r-- | liboil/sse/Makefile.am | 22 | ||||
-rw-r--r-- | liboil/sse/conv_sse.c | 88 |
9 files changed, 173 insertions, 10 deletions
@@ -1,3 +1,20 @@ +2005-05-26 David Schleef <ds@schleef.org> + + * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): + fix asm contraints that gcc-4.0 doesn't like + * liboil/conv/conv_3dnow.c: (conv_f32_s16_3dnow), + (conv_s32_f32_3dnow): remove mm0 constraint, it's irrelevant + * liboil/conv/conv_sse.c: (conv_f32_s32_sse), (conv_s32_f64_sse): + * liboil/copy/splat_ref.c: (splat_u32_ns_unroll4): add another + unroll + * liboil/liboilfunction.h: parentheses are good + + * liboil/sse/Makefile.am: add a separate directory for SSE + intrinsics, since they need to be compiled with special flags. + * liboil/sse/conv_sse.c: (conv_f32_s32_sse): + * liboil/Makefile.am: add sse directory + * configure.ac: cleanup + 2005-05-08 David Schleef <ds@schleef.org> * patches/patch-remove-indirection: Some patches that might get diff --git a/configure.ac b/configure.ac index 1d00b6e..f73498a 100644 --- a/configure.ac +++ b/configure.ac @@ -136,6 +136,7 @@ AM_CONDITIONAL(HAVE_CPU_IA64, test "x$HAVE_CPU_IA64" = "xyes") AC_C_BIGENDIAN + AC_FUNC_MMAP() AC_CHECK_LIB(m, rintf, AC_DEFINE(HAVE_RINTF, 1, [Define if rintf() is available])) @@ -169,6 +170,19 @@ if test x$HAVE_CPU_POWERPC = xyes ; then true) fi +if test x$HAVE_CPU_I386 = xyes ; then + AS_COMPILER_FLAG(["-mmmx"], [MMX_CFLAGS="-mmmx"], true) + AS_COMPILER_FLAG(["-msse"], [SSE_CFLAGS="-msse"], true) + AS_COMPILER_FLAG(["-msse2"], [SSE2_CFLAGS="-msse2"], true) + AS_COMPILER_FLAG(["-msse3"], [SSE3_CFLAGS="-msse3"], true) + AS_COMPILER_FLAG(["-m3dnow"], [_3DNOW_CFLAGS="-m3dnow"], true) +fi +AC_SUBST(MMX_CFLAGS) +AC_SUBST(SSE_CFLAGS) +AC_SUBST(SSE2_CFLAGS) +AC_SUBST(SSE3_CFLAGS) +AC_SUBST(_3DNOW_CFLAGS) + LIBOIL_CFLAGS="$LIBOIL_CFLAGS -D_GNU_SOURCE -D_POSIX_C_SOURCE=200112L -I\$(top_srcdir) -O2" AC_SUBST(LIBOIL_CFLAGS) @@ -179,7 +193,7 @@ pkgconfigdir="\$(libdir)/pkgconfig" AC_SUBST(pkgconfigdir) #CFLAGS=`echo "$CFLAGS" | sed -e 's/-O[0-9*]//g'` -CFLAGS="-g" +#CFLAGS="-g" AC_CONFIG_FILES([ Makefile @@ -192,6 +206,7 @@ liboil/dct/Makefile liboil/md5/Makefile liboil/jpeg/Makefile liboil/simdpack/Makefile +liboil/sse/Makefile liboil/utf8/Makefile testsuite/Makefile examples/Makefile diff --git a/liboil/Makefile.am b/liboil/Makefile.am index 51f234f..f33061f 100644 --- a/liboil/Makefile.am +++ b/liboil/Makefile.am @@ -1,7 +1,7 @@ pkgincludedir = $(includedir)/liboil-@LIBOIL_MAJORMINOR@/liboil -SUBDIRS = colorspace conv copy dct jpeg simdpack md5 utf8 +SUBDIRS = colorspace conv copy dct jpeg simdpack md5 utf8 sse lib_LTLIBRARIES = liboiltmp1.la liboil-@LIBOIL_MAJORMINOR@.la @@ -28,6 +28,7 @@ liboilfunctions_la_LIBADD = \ jpeg/libjpeg.la \ md5/libmd5.la \ simdpack/libsimdpack.la \ + sse/libsse.la \ utf8/libutf8.la \ $(LIBM) liboilfunctions_la_LDFLAGS = \ @@ -60,7 +61,7 @@ liboil_@LIBOIL_MAJORMINOR@_la_SOURCES = \ liboil_@LIBOIL_MAJORMINOR@_la_LIBADD = \ liboilfunctions.la \ $(LIBM) -liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS) +liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS) -msse liboil_@LIBOIL_MAJORMINOR@_la_LDFLAGS = \ -no-undefined \ -version-info $(LIBOIL_LIBVERSION) \ diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c index 3753589..a91981c 100644 --- a/liboil/colorspace/argb_paint_i386.c +++ b/liboil/colorspace/argb_paint_i386.c @@ -83,7 +83,7 @@ argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n) " decl %3\n" " jne 1b\n" " emms\n" - : "+r" (dest), "+%%eax" (color), "+r" (alpha), "+r" (n) + : "+r" (dest), "+a" (color), "+r" (alpha), "+r" (n) : "r" (&constants)); } OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_SSE); diff --git a/liboil/conv/conv_3dnow.c b/liboil/conv/conv_3dnow.c index 01efd23..1b86b58 100644 --- a/liboil/conv/conv_3dnow.c +++ b/liboil/conv/conv_3dnow.c @@ -42,8 +42,9 @@ conv_f32_s16_3dnow (float *dst, int dst_stride, int16_t * src, int src_stride, for (i = 0; i < n; i++) { asm volatile (" movswl 0(%0), %%eax \n" " movd %%eax, %%mm0 \n" - " pi2fd %%mm0, %%mm0 \n" " movd %%mm0, 0(%1) \n"::"r" (src), "r" (dst) - :"eax", "mm0"); + " pi2fd %%mm0, %%mm0 \n" " movd %%mm0, 0(%1) \n" + ::"r" (src), "r" (dst) + :"eax"); dst = OIL_OFFSET (dst, dst_stride); src = OIL_OFFSET (src, src_stride); @@ -74,7 +75,7 @@ conv_s32_f32_3dnow (int32_t * dst, int dst_stride, float *src, int src_stride, " movd %%mm1, 0(%1) \n" : :"r" (src), "r" (dst), "r" (constants) - :"mm0"); + ); dst = OIL_OFFSET (dst, dst_stride); src = OIL_OFFSET (src, src_stride); diff --git a/liboil/copy/splat_ref.c b/liboil/copy/splat_ref.c index eb126eb..ddd1444 100644 --- a/liboil/copy/splat_ref.c +++ b/liboil/copy/splat_ref.c @@ -111,6 +111,25 @@ static void splat_u32_ns_unroll2 (uint32_t *dest, uint32_t *param, int n) } OIL_DEFINE_IMPL(splat_u32_ns_unroll2, splat_u32_ns); +static void splat_u32_ns_unroll4 (uint32_t *dest, uint32_t *param, int n) +{ + int i; + while (n&3) { + *dest = *param; + dest++; + n--; + } + n >>= 2; + for(i=0;i<n;i++){ + dest[0] = *param; + dest[1] = *param; + dest[2] = *param; + dest[3] = *param; + dest+=4; + } +} +OIL_DEFINE_IMPL(splat_u32_ns_unroll4, splat_u32_ns); + static void splat_u8_ns_memset (uint8_t *dest, uint8_t *param, int n) { memset (dest, *param, n); diff --git a/liboil/liboilfunction.h b/liboil/liboilfunction.h index 43ef222..802f42c 100644 --- a/liboil/liboilfunction.h +++ b/liboil/liboilfunction.h @@ -72,9 +72,9 @@ struct _OilFunctionImpl { double profile_std; }; -#define OIL_GET(ptr, offset, type) (*(type *)((uint8_t *)ptr + offset) ) -#define OIL_OFFSET(ptr, offset) ((void *)((uint8_t *)ptr + offset) ) -#define OIL_INCREMENT(ptr, offset) (ptr = (void *)((uint8_t *)ptr + offset) ) +#define OIL_GET(ptr, offset, type) (*(type *)((uint8_t *)ptr + (offset)) ) +#define OIL_OFFSET(ptr, offset) ((void *)((uint8_t *)ptr + (offset)) ) +#define OIL_INCREMENT(ptr, offset) (ptr = (void *)((uint8_t *)ptr + (offset)) ) #define OIL_IMPL_FLAG_REF (1<<0) #define OIL_IMPL_FLAG_OPT (1<<1) diff --git a/liboil/sse/Makefile.am b/liboil/sse/Makefile.am new file mode 100644 index 0000000..98860c9 --- /dev/null +++ b/liboil/sse/Makefile.am @@ -0,0 +1,22 @@ + +if USE_ALT_OPT +opt_libs = libsse_opt.la +else +opt_libs = +endif +noinst_LTLIBRARIES = libsse.la $(opt_libs) + +if HAVE_CPU_I386 +c_sources = \ + conv_sse.c +endif + +libsse_la_SOURCES = $(c_sources) +libsse_la_CFLAGS = $(MMX_CFLAGS) $(SSE_CFLAGS) $(SSE2_CFLAGS) $(LIBOIL_CFLAGS) + +libsse_opt_la_SOURCES = $(c_sources) +libsse_opt_la_CFLAGS = $(LIBOIL_CFLAGS) $(LIBOIL_OPT_CFLAGS) $(MMX_CFLAGS) $(SSE_CFLAGS) $(SSE2_CFLAGS) + +libsse_la_SOURCES = \ + $(c_sources) + diff --git a/liboil/sse/conv_sse.c b/liboil/sse/conv_sse.c new file mode 100644 index 0000000..0c51973 --- /dev/null +++ b/liboil/sse/conv_sse.c @@ -0,0 +1,88 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include <liboil/liboilfunction.h> +#include <emmintrin.h> + +#include <../conv/conv.h> + + +static void +conv_f32_s32_sse (float *dst, int dst_stride, int32_t * src, int src_stride, + int n) +{ + int i; + uint32_t tmp_dest_array[68], *tmp_dest; + int32_t tmp_src_array[68], *tmp_src; + __m128 xmm0 = { 0 }; + __m128 xmm1 = { 0 }; + + tmp_dest = (void *)(((unsigned long)(tmp_dest_array) + 0xf)&~0xf); + tmp_src = (void *)(((unsigned long)(tmp_src_array) + 0xf)&~0xf); + while (n>64) { + for (i=0;i<64;i+=2){ + tmp_src[i] = OIL_GET(src, src_stride * i, int32_t); + tmp_src[i+1] = OIL_GET(src, src_stride * (i+1), int32_t); + } + for (i=0;i<8;i++){ + xmm0 = _mm_cvt_pi2ps (xmm0, *(__m64 *)(tmp_src + 8*i + 2)); + xmm0 = _mm_movelh_ps (xmm0, xmm0); + xmm0 = _mm_cvt_pi2ps (xmm0, *(__m64 *)(tmp_src + 8*i)); + _mm_store_ps (((float *)tmp_dest) + i*8, xmm0); + xmm1 = _mm_cvt_pi2ps (xmm1, *(__m64 *)(tmp_src + 8*i + 6)); + xmm1 = _mm_movelh_ps (xmm1, xmm1); + xmm1 = _mm_cvt_pi2ps (xmm1, *(__m64 *)(tmp_src + 8*i + 4)); + _mm_store_ps (((float *)tmp_dest) + i*8 + 4, xmm1); + } + for (i=0;i<64;i+=2){ + OIL_GET(dst, dst_stride * i, int32_t) = tmp_dest[i]; + OIL_GET(dst, dst_stride * (i+1), int32_t) = tmp_dest[i+1]; + } + n-=64; + src = OIL_OFFSET (src, src_stride * 64); + dst = OIL_OFFSET (dst, dst_stride * 64); + } + for (i=0;i<n;i++){ + tmp_src[i] = OIL_GET(src, src_stride * i, int32_t); + } + for (i=0;i<(n+3)/4;i++){ + xmm0 = _mm_cvt_pi2ps (xmm0, ((__m64 *)tmp_src)[2*i+1]); + xmm0 = _mm_movelh_ps (xmm0, xmm0); + xmm0 = _mm_cvt_pi2ps (xmm0, ((__m64 *)tmp_src)[2*i]); + _mm_store_ps (((float *)tmp_dest) + i*4, xmm0); + } + for (i=0;i<n;i++){ + OIL_GET(dst, dst_stride * i, int32_t) = tmp_dest[i]; + } + + _m_empty(); +} +OIL_DEFINE_IMPL_FULL (conv_f32_s32_sse, conv_f32_s32, OIL_IMPL_FLAG_SSE); + |