summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-06-18 06:33:44 +0000
committerDavid Schleef <ds@schleef.org>2005-06-18 06:33:44 +0000
commit9bc8a2317bfb40534c9be659f198e9fb53d09f3d (patch)
tree868be118e872d49d5bc0b65120d3310c0d0e9d8d
parentcb5c9c5b48b447b3fc3c3d31072b9fc6b52b54fd (diff)
downloadliboil-9bc8a2317bfb40534c9be659f198e9fb53d09f3d.tar.gz
* liboil/Makefile.am: create a tmpfile for liboilarray.c to
avoid unnecessary building of liboil-0.3.la * liboil/colorspace/Makefile.am: add resample_powerpc.c * liboil/colorspace/resample.c: (merge_linear_argb_test): add test function, since one parameter needs to be in the range [0,256] * liboil/colorspace/resample_powerpc.c: (merge_linear_argb_powerpc): altivec impl * liboil/colorspace/rgb2rgba_powerpc.c: (rgb2rgba_powerpcasm): Improve the asm * liboil/copy/Makefile.am: new file * liboil/copy/splat_powerpc.c: (splat_u8_ns_altivec), (splat_u8_ns_altivec2), (splat_u32_ns_altivec): some altivec impls
-rw-r--r--ChangeLog15
-rw-r--r--liboil/Makefile.am37
-rw-r--r--liboil/colorspace/Makefile.am3
-rw-r--r--liboil/colorspace/resample.c14
-rw-r--r--liboil/colorspace/resample_powerpc.c98
-rw-r--r--liboil/colorspace/rgb2rgba_powerpc.c8
-rw-r--r--liboil/copy/Makefile.am3
-rw-r--r--liboil/copy/splat_powerpc.c144
8 files changed, 296 insertions, 26 deletions
diff --git a/ChangeLog b/ChangeLog
index 294167e..8043a01 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,20 @@
2005-06-17 David Schleef <ds@schleef.org>
+ * liboil/Makefile.am: create a tmpfile for liboilarray.c to
+ avoid unnecessary building of liboil-0.3.la
+ * liboil/colorspace/Makefile.am: add resample_powerpc.c
+ * liboil/colorspace/resample.c: (merge_linear_argb_test): add test
+ function, since one parameter needs to be in the range [0,256]
+ * liboil/colorspace/resample_powerpc.c: (merge_linear_argb_powerpc):
+ altivec impl
+ * liboil/colorspace/rgb2rgba_powerpc.c: (rgb2rgba_powerpcasm):
+ Improve the asm
+ * liboil/copy/Makefile.am: new file
+ * liboil/copy/splat_powerpc.c: (splat_u8_ns_altivec),
+ (splat_u8_ns_altivec2), (splat_u32_ns_altivec): some altivec impls
+
+2005-06-17 David Schleef <ds@schleef.org>
+
* liboil/copy/copy_powerpc.c: (copy_u8_altivec),
(copy_u8_altivec2): Rewrite so that they actually work.
* liboil/motovec/Makefile.am: Fix up motovec stuff
diff --git a/liboil/Makefile.am b/liboil/Makefile.am
index e245e7c..c6e0590 100644
--- a/liboil/Makefile.am
+++ b/liboil/Makefile.am
@@ -88,28 +88,29 @@ build_marshal_CFLAGS = $(LIBOIL_CFLAGS)
build_marshal_LDADD = $(LIBOIL_LIBS)
liboilarray.c: liboiltmp1.la Makefile
- echo '/* This file is autogenerated. Do not edit */' >liboilarray.c
- echo >>liboilarray.c
- echo '#include <liboil/liboilfunction.h>' >>liboilarray.c
- echo >>liboilarray.c
+ echo '/* This file is autogenerated. Do not edit */' >liboilarray.c.tmp
+ echo >>liboilarray.c.tmp
+ echo '#include <liboil/liboilfunction.h>' >>liboilarray.c.tmp
+ echo >>liboilarray.c.tmp
grep '^_oil_function_class_' .libs/liboiltmp1.exp | \
- sed 's/.*/extern OilFunctionClass &;/' >>liboilarray.c
- echo >>liboilarray.c
- echo 'OilFunctionClass *_oil_function_class_array[] = {' >>liboilarray.c
+ sed 's/.*/extern OilFunctionClass &;/' >>liboilarray.c.tmp
+ echo >>liboilarray.c.tmp
+ echo 'OilFunctionClass *_oil_function_class_array[] = {' >>liboilarray.c.tmp
grep '^_oil_function_class_' .libs/liboiltmp1.exp | \
- sed 's/.*/ \&&,/' >>liboilarray.c
- echo ' NULL' >>liboilarray.c
- echo '};' >>liboilarray.c
- echo >>liboilarray.c
+ sed 's/.*/ \&&,/' >>liboilarray.c.tmp
+ echo ' NULL' >>liboilarray.c.tmp
+ echo '};' >>liboilarray.c.tmp
+ echo >>liboilarray.c.tmp
grep '^_oil_function_impl_' .libs/liboiltmp1.exp | \
- sed 's/.*/extern OilFunctionImpl &;/' >>liboilarray.c
- echo >>liboilarray.c
- echo 'OilFunctionImpl *_oil_function_impl_array[] = {' >>liboilarray.c
+ sed 's/.*/extern OilFunctionImpl &;/' >>liboilarray.c.tmp
+ echo >>liboilarray.c.tmp
+ echo 'OilFunctionImpl *_oil_function_impl_array[] = {' >>liboilarray.c.tmp
grep '^_oil_function_impl_' .libs/liboiltmp1.exp | \
- sed 's/.*/ \&&,/' >>liboilarray.c
- echo ' NULL' >>liboilarray.c
- echo '};' >>liboilarray.c
- echo >>liboilarray.c
+ sed 's/.*/ \&&,/' >>liboilarray.c.tmp
+ echo ' NULL' >>liboilarray.c.tmp
+ echo '};' >>liboilarray.c.tmp
+ echo >>liboilarray.c.tmp
+ cmp liboilarray.c.tmp liboilarray.c || mv liboilarray.c.tmp liboilarray.c
liboilfuncs.h:
./build_prototypes >liboilfuncs.h
diff --git a/liboil/colorspace/Makefile.am b/liboil/colorspace/Makefile.am
index 98a18ff..c39a2cb 100644
--- a/liboil/colorspace/Makefile.am
+++ b/liboil/colorspace/Makefile.am
@@ -17,7 +17,8 @@ c_sources = \
if HAVE_CPU_POWERPC
powerpc_sources = \
rgb2bgr_powerpc.c \
- rgb2rgba_powerpc.c
+ rgb2rgba_powerpc.c \
+ resample_powerpc.c
else
powerpc_sources =
endif
diff --git a/liboil/colorspace/resample.c b/liboil/colorspace/resample.c
index e07d6c5..831f93f 100644
--- a/liboil/colorspace/resample.c
+++ b/liboil/colorspace/resample.c
@@ -32,6 +32,7 @@
#include <liboil/liboil.h>
#include <liboil/liboilfunction.h>
#include <liboil/liboiltest.h>
+#include <liboil/liboilrandom.h>
static void
@@ -109,8 +110,17 @@ resample_linear_argb_ref (uint32_t *d, uint32_t *s, int n, uint32_t *in)
OIL_DEFINE_IMPL_REF (resample_linear_argb_ref, resample_linear_argb);
-OIL_DEFINE_CLASS (merge_linear_argb,
- "uint32_t *d_n, uint32_t *s_n, uint32_t *s2_n, uint32_t *s3_1, int n");
+static void
+merge_linear_argb_test (OilTest *test)
+{
+ uint32_t *src3 = (uint32_t *)(test->params[OIL_ARG_SRC3].src_data +
+ OIL_TEST_HEADER);
+
+ src3[0] = oil_rand_u8();
+}
+OIL_DEFINE_CLASS_FULL (merge_linear_argb,
+ "uint32_t *d_n, uint32_t *s_n, uint32_t *s2_n, uint32_t *s3_1, int n",
+ merge_linear_argb_test);
static void
merge_linear_argb_ref (uint32_t *d, uint32_t *s1, uint32_t *s2,
diff --git a/liboil/colorspace/resample_powerpc.c b/liboil/colorspace/resample_powerpc.c
new file mode 100644
index 0000000..eef688a
--- /dev/null
+++ b/liboil/colorspace/resample_powerpc.c
@@ -0,0 +1,98 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+#include <liboil/liboiltest.h>
+
+
+OIL_DECLARE_CLASS (merge_linear_argb);
+
+#define ALIGN_UP(ptr,boundary) ((void *)(((unsigned long)ptr + boundary-1) & (~(boundary-1))))
+
+#if 0
+static uint16_t consts[8] __attribute__ ((__aligned__ (16))) = {
+ 256, 256, 256, 256, 256, 256, 256, 256 };
+#endif
+
+static void
+merge_linear_argb_powerpc (uint32_t *dest, uint32_t *src1, uint32_t *src2,
+ uint32_t *src3, int n)
+{
+ uint32_t tmp[48];
+ uint32_t *atmp;
+ int i;
+
+ atmp = ALIGN_UP(tmp,16);
+ if (src3[0] == 0) {
+ for(i=0;i<n;i++){
+ dest[i] = src1[i];
+ }
+ } else if (src3[0] == 256) {
+ for(i=0;i<n;i++){
+ dest[i] = src2[i];
+ }
+ } else {
+ for(i=0;i<n;i+=2){
+ atmp[0] = src1[i];
+ atmp[1] = src1[i+1];
+ atmp[2] = src2[i];
+ atmp[3] = src2[i+1];
+ atmp[4] = 256 - src3[0];
+ atmp[5] = src3[0];
+
+ asm volatile(
+ " lvx v0, 0, %0\n"
+ " li r11, 16\n"
+ " lvx v1, r11, %0\n"
+ " vxor v2, v2, v2\n"
+ " vmrghb v3, v2, v0\n"
+ " vmrglb v4, v2, v0\n"
+ " vsplth v5, v1, 1\n"
+ " vsplth v6, v1, 3\n"
+ " vmuloub v0, v4, v6\n"
+ " vmuloub v1, v3, v5\n"
+ " vadduhs v0, v0, v1\n"
+ " vspltish v1, 8\n"
+ " vsrh v0, v0, v1\n"
+ " vpkuhus v0, v0, v0\n"
+ " stvx v0, 0, %0\n"
+ :
+ : "b" (atmp));
+
+ dest[i] = atmp[0];
+ if (i+1<n) dest[i+1] = atmp[1];
+ }
+ }
+}
+OIL_DEFINE_IMPL_FULL (merge_linear_argb_powerpc, merge_linear_argb, OIL_IMPL_FLAG_ALTIVEC);
+
+
diff --git a/liboil/colorspace/rgb2rgba_powerpc.c b/liboil/colorspace/rgb2rgba_powerpc.c
index 91a2626..9757e1d 100644
--- a/liboil/colorspace/rgb2rgba_powerpc.c
+++ b/liboil/colorspace/rgb2rgba_powerpc.c
@@ -34,13 +34,13 @@
OIL_DECLARE_CLASS (rgb2rgba);
static void
-rgb2rgba_ppc (uint8_t *dest, uint8_t* src, int n)
+rgb2rgba_powerpcasm (uint8_t *dest, uint8_t* src, int n)
{
+ src -= 3;
dest -= 4;
asm volatile (
" mtctr %2 \n"
- "1: lswi r10, %1, 3 \n"
- " addi %1, %1, 3 \n"
+ "1: lwzu r10, 3(%1) \n"
" ori r10, r10, 0xFF \n"
" stwu r10, 4(%0) \n"
" bdnz 1b \n"
@@ -48,5 +48,5 @@ rgb2rgba_ppc (uint8_t *dest, uint8_t* src, int n)
: "b" (n)
: "10", "ctr");
}
+OIL_DEFINE_IMPL_ASM (rgb2rgba_powerpcasm, rgb2rgba);
-OIL_DEFINE_IMPL_ASM (rgb2rgba_ppc, rgb2rgba);
diff --git a/liboil/copy/Makefile.am b/liboil/copy/Makefile.am
index fb86613..dc2aebf 100644
--- a/liboil/copy/Makefile.am
+++ b/liboil/copy/Makefile.am
@@ -9,7 +9,8 @@ noinst_LTLIBRARIES = libcopy.la $(opt_libs)
if HAVE_CPU_POWERPC
powerpc_sources = \
- copy_powerpc.c
+ copy_powerpc.c \
+ splat_powerpc.c
else
powerpc_sources =
endif
diff --git a/liboil/copy/splat_powerpc.c b/liboil/copy/splat_powerpc.c
new file mode 100644
index 0000000..e33b928
--- /dev/null
+++ b/liboil/copy/splat_powerpc.c
@@ -0,0 +1,144 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboilfunction.h>
+#include <liboil/liboilfunction.h>
+
+OIL_DECLARE_CLASS(splat_u8_ns);
+OIL_DECLARE_CLASS(splat_u32_ns);
+
+static void
+splat_u8_ns_altivec (uint8_t *dest, uint8_t *src, int n)
+{
+ while(((unsigned long)dest & 0xf) && n>0) {
+ *dest++ = *src;
+ n--;
+ }
+
+ if (n/16) {
+ asm volatile (
+ " mtctr %2\n"
+ " li r11, 0\n"
+ " lvsl v2, 0, %1\n"
+ " lvx v0, 0, %1\n"
+ " vperm v0, v0, v0, v2\n"
+ " vspltb v0, v0, 0\n"
+ "1:\n"
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " bdnz+ 1b\n"
+ " add %0, %0, r11\n"
+ : "+b" (dest), "+b" (src)
+ : "r" (n/16));
+ }
+
+ n &= 0xf;
+ while(n) {
+ *dest++ = *src;
+ n--;
+ }
+}
+OIL_DEFINE_IMPL_FULL (splat_u8_ns_altivec, splat_u8_ns, OIL_IMPL_FLAG_ALTIVEC);
+
+static void
+splat_u8_ns_altivec2 (uint8_t *dest, uint8_t *src, int n)
+{
+ while(((unsigned long)dest & 0xf) && n>0) {
+ *dest++ = *src;
+ n--;
+ }
+
+ if (n/64) {
+ asm volatile (
+ " mtctr %2\n"
+ " li r11, 0\n"
+ " lvsl v2, 0, %1\n"
+ " lvx v0, 0, %1\n"
+ " vperm v0, v0, v0, v2\n"
+ " vspltb v0, v0, 0\n"
+ "1:\n"
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " bdnz+ 1b\n"
+ " add %0, %0, r11\n"
+ : "+b" (dest), "+b" (src)
+ : "r" (n/64));
+ }
+
+ n &= 0x3f;
+ while(n) {
+ *dest++ = *src;
+ n--;
+ }
+}
+OIL_DEFINE_IMPL_FULL (splat_u8_ns_altivec2, splat_u8_ns, OIL_IMPL_FLAG_ALTIVEC);
+
+/* With a little love, this could work with 4-byte unaligned dest,
+ * but I'm not feeling loving today. */
+static void
+splat_u32_ns_altivec (uint32_t *dest, uint32_t *src, int n)
+{
+ while(((unsigned long)dest & 0xc) && n>0) {
+ *dest++ = *src;
+ n--;
+ }
+
+ if (n/4) {
+ asm volatile (
+ " mtctr %2\n"
+ " li r11, 0\n"
+ " lvsl v2, 0, %1\n"
+ " lvx v0, 0, %1\n"
+ " vperm v0, v0, v0, v2\n"
+ " vspltw v0, v0, 0\n"
+ "1:\n"
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " bdnz+ 1b\n"
+ " add %0, %0, r11\n"
+ : "+b" (dest), "+b" (src)
+ : "r" (n/4));
+ }
+
+ n &= 0x3;
+ while(n) {
+ *dest++ = *src;
+ n--;
+ }
+}
+OIL_DEFINE_IMPL_FULL (splat_u32_ns_altivec, splat_u32_ns, OIL_IMPL_FLAG_ALTIVEC);
+