diff options
author | David Schleef <ds@schleef.org> | 2005-06-18 06:33:44 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-06-18 06:33:44 +0000 |
commit | 9bc8a2317bfb40534c9be659f198e9fb53d09f3d (patch) | |
tree | 868be118e872d49d5bc0b65120d3310c0d0e9d8d | |
parent | cb5c9c5b48b447b3fc3c3d31072b9fc6b52b54fd (diff) | |
download | liboil-9bc8a2317bfb40534c9be659f198e9fb53d09f3d.tar.gz |
* liboil/Makefile.am: create a tmpfile for liboilarray.c to
avoid unnecessary building of liboil-0.3.la
* liboil/colorspace/Makefile.am: add resample_powerpc.c
* liboil/colorspace/resample.c: (merge_linear_argb_test): add test
function, since one parameter needs to be in the range [0,256]
* liboil/colorspace/resample_powerpc.c: (merge_linear_argb_powerpc):
altivec impl
* liboil/colorspace/rgb2rgba_powerpc.c: (rgb2rgba_powerpcasm):
Improve the asm
* liboil/copy/Makefile.am: new file
* liboil/copy/splat_powerpc.c: (splat_u8_ns_altivec),
(splat_u8_ns_altivec2), (splat_u32_ns_altivec): some altivec impls
-rw-r--r-- | ChangeLog | 15 | ||||
-rw-r--r-- | liboil/Makefile.am | 37 | ||||
-rw-r--r-- | liboil/colorspace/Makefile.am | 3 | ||||
-rw-r--r-- | liboil/colorspace/resample.c | 14 | ||||
-rw-r--r-- | liboil/colorspace/resample_powerpc.c | 98 | ||||
-rw-r--r-- | liboil/colorspace/rgb2rgba_powerpc.c | 8 | ||||
-rw-r--r-- | liboil/copy/Makefile.am | 3 | ||||
-rw-r--r-- | liboil/copy/splat_powerpc.c | 144 |
8 files changed, 296 insertions, 26 deletions
@@ -1,5 +1,20 @@ 2005-06-17 David Schleef <ds@schleef.org> + * liboil/Makefile.am: create a tmpfile for liboilarray.c to + avoid unnecessary building of liboil-0.3.la + * liboil/colorspace/Makefile.am: add resample_powerpc.c + * liboil/colorspace/resample.c: (merge_linear_argb_test): add test + function, since one parameter needs to be in the range [0,256] + * liboil/colorspace/resample_powerpc.c: (merge_linear_argb_powerpc): + altivec impl + * liboil/colorspace/rgb2rgba_powerpc.c: (rgb2rgba_powerpcasm): + Improve the asm + * liboil/copy/Makefile.am: new file + * liboil/copy/splat_powerpc.c: (splat_u8_ns_altivec), + (splat_u8_ns_altivec2), (splat_u32_ns_altivec): some altivec impls + +2005-06-17 David Schleef <ds@schleef.org> + * liboil/copy/copy_powerpc.c: (copy_u8_altivec), (copy_u8_altivec2): Rewrite so that they actually work. * liboil/motovec/Makefile.am: Fix up motovec stuff diff --git a/liboil/Makefile.am b/liboil/Makefile.am index e245e7c..c6e0590 100644 --- a/liboil/Makefile.am +++ b/liboil/Makefile.am @@ -88,28 +88,29 @@ build_marshal_CFLAGS = $(LIBOIL_CFLAGS) build_marshal_LDADD = $(LIBOIL_LIBS) liboilarray.c: liboiltmp1.la Makefile - echo '/* This file is autogenerated. Do not edit */' >liboilarray.c - echo >>liboilarray.c - echo '#include <liboil/liboilfunction.h>' >>liboilarray.c - echo >>liboilarray.c + echo '/* This file is autogenerated. Do not edit */' >liboilarray.c.tmp + echo >>liboilarray.c.tmp + echo '#include <liboil/liboilfunction.h>' >>liboilarray.c.tmp + echo >>liboilarray.c.tmp grep '^_oil_function_class_' .libs/liboiltmp1.exp | \ - sed 's/.*/extern OilFunctionClass &;/' >>liboilarray.c - echo >>liboilarray.c - echo 'OilFunctionClass *_oil_function_class_array[] = {' >>liboilarray.c + sed 's/.*/extern OilFunctionClass &;/' >>liboilarray.c.tmp + echo >>liboilarray.c.tmp + echo 'OilFunctionClass *_oil_function_class_array[] = {' >>liboilarray.c.tmp grep '^_oil_function_class_' .libs/liboiltmp1.exp | \ - sed 's/.*/ \&&,/' >>liboilarray.c - echo ' NULL' >>liboilarray.c - echo '};' >>liboilarray.c - echo >>liboilarray.c + sed 's/.*/ \&&,/' >>liboilarray.c.tmp + echo ' NULL' >>liboilarray.c.tmp + echo '};' >>liboilarray.c.tmp + echo >>liboilarray.c.tmp grep '^_oil_function_impl_' .libs/liboiltmp1.exp | \ - sed 's/.*/extern OilFunctionImpl &;/' >>liboilarray.c - echo >>liboilarray.c - echo 'OilFunctionImpl *_oil_function_impl_array[] = {' >>liboilarray.c + sed 's/.*/extern OilFunctionImpl &;/' >>liboilarray.c.tmp + echo >>liboilarray.c.tmp + echo 'OilFunctionImpl *_oil_function_impl_array[] = {' >>liboilarray.c.tmp grep '^_oil_function_impl_' .libs/liboiltmp1.exp | \ - sed 's/.*/ \&&,/' >>liboilarray.c - echo ' NULL' >>liboilarray.c - echo '};' >>liboilarray.c - echo >>liboilarray.c + sed 's/.*/ \&&,/' >>liboilarray.c.tmp + echo ' NULL' >>liboilarray.c.tmp + echo '};' >>liboilarray.c.tmp + echo >>liboilarray.c.tmp + cmp liboilarray.c.tmp liboilarray.c || mv liboilarray.c.tmp liboilarray.c liboilfuncs.h: ./build_prototypes >liboilfuncs.h diff --git a/liboil/colorspace/Makefile.am b/liboil/colorspace/Makefile.am index 98a18ff..c39a2cb 100644 --- a/liboil/colorspace/Makefile.am +++ b/liboil/colorspace/Makefile.am @@ -17,7 +17,8 @@ c_sources = \ if HAVE_CPU_POWERPC powerpc_sources = \ rgb2bgr_powerpc.c \ - rgb2rgba_powerpc.c + rgb2rgba_powerpc.c \ + resample_powerpc.c else powerpc_sources = endif diff --git a/liboil/colorspace/resample.c b/liboil/colorspace/resample.c index e07d6c5..831f93f 100644 --- a/liboil/colorspace/resample.c +++ b/liboil/colorspace/resample.c @@ -32,6 +32,7 @@ #include <liboil/liboil.h> #include <liboil/liboilfunction.h> #include <liboil/liboiltest.h> +#include <liboil/liboilrandom.h> static void @@ -109,8 +110,17 @@ resample_linear_argb_ref (uint32_t *d, uint32_t *s, int n, uint32_t *in) OIL_DEFINE_IMPL_REF (resample_linear_argb_ref, resample_linear_argb); -OIL_DEFINE_CLASS (merge_linear_argb, - "uint32_t *d_n, uint32_t *s_n, uint32_t *s2_n, uint32_t *s3_1, int n"); +static void +merge_linear_argb_test (OilTest *test) +{ + uint32_t *src3 = (uint32_t *)(test->params[OIL_ARG_SRC3].src_data + + OIL_TEST_HEADER); + + src3[0] = oil_rand_u8(); +} +OIL_DEFINE_CLASS_FULL (merge_linear_argb, + "uint32_t *d_n, uint32_t *s_n, uint32_t *s2_n, uint32_t *s3_1, int n", + merge_linear_argb_test); static void merge_linear_argb_ref (uint32_t *d, uint32_t *s1, uint32_t *s2, diff --git a/liboil/colorspace/resample_powerpc.c b/liboil/colorspace/resample_powerpc.c new file mode 100644 index 0000000..eef688a --- /dev/null +++ b/liboil/colorspace/resample_powerpc.c @@ -0,0 +1,98 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <liboil/liboil.h> +#include <liboil/liboilfunction.h> +#include <liboil/liboiltest.h> + + +OIL_DECLARE_CLASS (merge_linear_argb); + +#define ALIGN_UP(ptr,boundary) ((void *)(((unsigned long)ptr + boundary-1) & (~(boundary-1)))) + +#if 0 +static uint16_t consts[8] __attribute__ ((__aligned__ (16))) = { + 256, 256, 256, 256, 256, 256, 256, 256 }; +#endif + +static void +merge_linear_argb_powerpc (uint32_t *dest, uint32_t *src1, uint32_t *src2, + uint32_t *src3, int n) +{ + uint32_t tmp[48]; + uint32_t *atmp; + int i; + + atmp = ALIGN_UP(tmp,16); + if (src3[0] == 0) { + for(i=0;i<n;i++){ + dest[i] = src1[i]; + } + } else if (src3[0] == 256) { + for(i=0;i<n;i++){ + dest[i] = src2[i]; + } + } else { + for(i=0;i<n;i+=2){ + atmp[0] = src1[i]; + atmp[1] = src1[i+1]; + atmp[2] = src2[i]; + atmp[3] = src2[i+1]; + atmp[4] = 256 - src3[0]; + atmp[5] = src3[0]; + + asm volatile( + " lvx v0, 0, %0\n" + " li r11, 16\n" + " lvx v1, r11, %0\n" + " vxor v2, v2, v2\n" + " vmrghb v3, v2, v0\n" + " vmrglb v4, v2, v0\n" + " vsplth v5, v1, 1\n" + " vsplth v6, v1, 3\n" + " vmuloub v0, v4, v6\n" + " vmuloub v1, v3, v5\n" + " vadduhs v0, v0, v1\n" + " vspltish v1, 8\n" + " vsrh v0, v0, v1\n" + " vpkuhus v0, v0, v0\n" + " stvx v0, 0, %0\n" + : + : "b" (atmp)); + + dest[i] = atmp[0]; + if (i+1<n) dest[i+1] = atmp[1]; + } + } +} +OIL_DEFINE_IMPL_FULL (merge_linear_argb_powerpc, merge_linear_argb, OIL_IMPL_FLAG_ALTIVEC); + + diff --git a/liboil/colorspace/rgb2rgba_powerpc.c b/liboil/colorspace/rgb2rgba_powerpc.c index 91a2626..9757e1d 100644 --- a/liboil/colorspace/rgb2rgba_powerpc.c +++ b/liboil/colorspace/rgb2rgba_powerpc.c @@ -34,13 +34,13 @@ OIL_DECLARE_CLASS (rgb2rgba); static void -rgb2rgba_ppc (uint8_t *dest, uint8_t* src, int n) +rgb2rgba_powerpcasm (uint8_t *dest, uint8_t* src, int n) { + src -= 3; dest -= 4; asm volatile ( " mtctr %2 \n" - "1: lswi r10, %1, 3 \n" - " addi %1, %1, 3 \n" + "1: lwzu r10, 3(%1) \n" " ori r10, r10, 0xFF \n" " stwu r10, 4(%0) \n" " bdnz 1b \n" @@ -48,5 +48,5 @@ rgb2rgba_ppc (uint8_t *dest, uint8_t* src, int n) : "b" (n) : "10", "ctr"); } +OIL_DEFINE_IMPL_ASM (rgb2rgba_powerpcasm, rgb2rgba); -OIL_DEFINE_IMPL_ASM (rgb2rgba_ppc, rgb2rgba); diff --git a/liboil/copy/Makefile.am b/liboil/copy/Makefile.am index fb86613..dc2aebf 100644 --- a/liboil/copy/Makefile.am +++ b/liboil/copy/Makefile.am @@ -9,7 +9,8 @@ noinst_LTLIBRARIES = libcopy.la $(opt_libs) if HAVE_CPU_POWERPC powerpc_sources = \ - copy_powerpc.c + copy_powerpc.c \ + splat_powerpc.c else powerpc_sources = endif diff --git a/liboil/copy/splat_powerpc.c b/liboil/copy/splat_powerpc.c new file mode 100644 index 0000000..e33b928 --- /dev/null +++ b/liboil/copy/splat_powerpc.c @@ -0,0 +1,144 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2004 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <liboil/liboilfunction.h> +#include <liboil/liboilfunction.h> + +OIL_DECLARE_CLASS(splat_u8_ns); +OIL_DECLARE_CLASS(splat_u32_ns); + +static void +splat_u8_ns_altivec (uint8_t *dest, uint8_t *src, int n) +{ + while(((unsigned long)dest & 0xf) && n>0) { + *dest++ = *src; + n--; + } + + if (n/16) { + asm volatile ( + " mtctr %2\n" + " li r11, 0\n" + " lvsl v2, 0, %1\n" + " lvx v0, 0, %1\n" + " vperm v0, v0, v0, v2\n" + " vspltb v0, v0, 0\n" + "1:\n" + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " bdnz+ 1b\n" + " add %0, %0, r11\n" + : "+b" (dest), "+b" (src) + : "r" (n/16)); + } + + n &= 0xf; + while(n) { + *dest++ = *src; + n--; + } +} +OIL_DEFINE_IMPL_FULL (splat_u8_ns_altivec, splat_u8_ns, OIL_IMPL_FLAG_ALTIVEC); + +static void +splat_u8_ns_altivec2 (uint8_t *dest, uint8_t *src, int n) +{ + while(((unsigned long)dest & 0xf) && n>0) { + *dest++ = *src; + n--; + } + + if (n/64) { + asm volatile ( + " mtctr %2\n" + " li r11, 0\n" + " lvsl v2, 0, %1\n" + " lvx v0, 0, %1\n" + " vperm v0, v0, v0, v2\n" + " vspltb v0, v0, 0\n" + "1:\n" + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " bdnz+ 1b\n" + " add %0, %0, r11\n" + : "+b" (dest), "+b" (src) + : "r" (n/64)); + } + + n &= 0x3f; + while(n) { + *dest++ = *src; + n--; + } +} +OIL_DEFINE_IMPL_FULL (splat_u8_ns_altivec2, splat_u8_ns, OIL_IMPL_FLAG_ALTIVEC); + +/* With a little love, this could work with 4-byte unaligned dest, + * but I'm not feeling loving today. */ +static void +splat_u32_ns_altivec (uint32_t *dest, uint32_t *src, int n) +{ + while(((unsigned long)dest & 0xc) && n>0) { + *dest++ = *src; + n--; + } + + if (n/4) { + asm volatile ( + " mtctr %2\n" + " li r11, 0\n" + " lvsl v2, 0, %1\n" + " lvx v0, 0, %1\n" + " vperm v0, v0, v0, v2\n" + " vspltw v0, v0, 0\n" + "1:\n" + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " bdnz+ 1b\n" + " add %0, %0, r11\n" + : "+b" (dest), "+b" (src) + : "r" (n/4)); + } + + n &= 0x3; + while(n) { + *dest++ = *src; + n--; + } +} +OIL_DEFINE_IMPL_FULL (splat_u32_ns_altivec, splat_u32_ns, OIL_IMPL_FLAG_ALTIVEC); + |