summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-06-18 01:07:34 +0000
committerDavid Schleef <ds@schleef.org>2005-06-18 01:07:34 +0000
commitcb5c9c5b48b447b3fc3c3d31072b9fc6b52b54fd (patch)
treeee0d804857ab91e066d557333e220257739d1f61
parentd64fd56082933579566d4bf45d3f421d3eba8392 (diff)
downloadliboil-cb5c9c5b48b447b3fc3c3d31072b9fc6b52b54fd.tar.gz
* liboil/copy/copy_powerpc.c: (copy_u8_altivec),
(copy_u8_altivec2): Rewrite so that they actually work. * liboil/motovec/Makefile.am: Fix up motovec stuff * liboil/motovec/motovec.c: (copy_u8_motovec), (splat_u8_ns_motovec): same
-rw-r--r--ChangeLog8
-rw-r--r--liboil/copy/copy_powerpc.c144
-rw-r--r--liboil/motovec/Makefile.am5
-rw-r--r--liboil/motovec/motovec.c55
4 files changed, 128 insertions, 84 deletions
diff --git a/ChangeLog b/ChangeLog
index 76a2b64..294167e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
2005-06-17 David Schleef <ds@schleef.org>
+ * liboil/copy/copy_powerpc.c: (copy_u8_altivec),
+ (copy_u8_altivec2): Rewrite so that they actually work.
+ * liboil/motovec/Makefile.am: Fix up motovec stuff
+ * liboil/motovec/motovec.c: (copy_u8_motovec),
+ (splat_u8_ns_motovec): same
+
+2005-06-17 David Schleef <ds@schleef.org>
+
* configure.ac: snarf LIBMOTOVEC because it has a compatible
license.
* COPYING:
diff --git a/liboil/copy/copy_powerpc.c b/liboil/copy/copy_powerpc.c
index 7a7cb6b..57b60f2 100644
--- a/liboil/copy/copy_powerpc.c
+++ b/liboil/copy/copy_powerpc.c
@@ -37,30 +37,46 @@ OIL_DECLARE_CLASS(copy_u8);
static void
copy_u8_altivec (uint8_t *dest, uint8_t *src, int n)
{
- while((unsigned long)dest & 0xf) {
+ while((unsigned long)dest & 0xf && n>0) {
*dest++ = *src++;
n--;
}
if (n/16) {
- asm volatile (
- " mtctr %2\n"
- " li r11, 0\n"
- " lvsl v2, 0, %1\n"
- " lvx v0, 0, %1\n"
- " addi %1, %1, 16\n"
- "1:\n"
- " lvxl v1, r11, %1\n"
- " vperm v0, v0, v1, v2\n"
- " stvxl v0, r11, %0\n"
- " vor v0, v1, v1\n"
- " addi r11, r11, 16\n"
- " bdnz+ 1b\n"
- " add %0, %0, r11\n"
- " add %1, %1, r11\n"
- " addi %1, %1, -16\n"
- : "+b" (dest), "+b" (src)
- : "r" (n/16));
+ if ((unsigned long)src & 0xf) {
+ asm volatile (
+ " mtctr %2\n"
+ " li r11, 0\n"
+ " lvsl v2, 0, %1\n"
+ " lvx v0, 0, %1\n"
+ " addi %1, %1, 16\n"
+ "1:\n"
+ " lvx v1, r11, %1\n"
+ " vperm v0, v0, v1, v2\n"
+ " stvx v0, r11, %0\n"
+ " vor v0, v1, v1\n"
+ " addi r11, r11, 16\n"
+ " bdnz+ 1b\n"
+ " add %0, %0, r11\n"
+ " add %1, %1, r11\n"
+ " addi %1, %1, -16\n"
+ : "+b" (dest), "+b" (src)
+ : "r" (n/16));
+ } else {
+ asm volatile (
+ " mtctr %2\n"
+ " li r11, 0\n"
+ "1:\n"
+ " lvx v1, r11, %1\n"
+ " stvx v1, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " bdnz+ 1b\n"
+ " add %0, %0, r11\n"
+ " add %1, %1, r11\n"
+ " addi %1, %1, -16\n"
+ : "+b" (dest), "+b" (src)
+ : "r" (n/16));
+ }
}
n &= 0xf;
@@ -71,98 +87,60 @@ copy_u8_altivec (uint8_t *dest, uint8_t *src, int n)
}
OIL_DEFINE_IMPL_FULL (copy_u8_altivec, copy_u8, OIL_IMPL_FLAG_ALTIVEC);
-
-
-
-
static void
copy_u8_altivec2 (uint8_t *dest, uint8_t *src, int n)
{
- while((unsigned long)dest & 0xf) {
+ while((unsigned long)dest & 0xf && n>0) {
*dest++ = *src++;
n--;
}
- if (n/16) {
+ if (n/64) {
asm volatile (
" mtctr %2\n"
" li r11, 0\n"
- " lvsl v2, 0, %1\n"
+ " lvsl v5, 0, %1\n"
" lvx v0, 0, %1\n"
" addi %1, %1, 16\n"
"1:\n"
-#if 0
- " lvxl v1, r11, %1\n"
- " vperm v0, v0, v1, v2\n"
- " stvxl v0, r11, %0\n"
- " vor v0, v1, v1\n"
+ " lvx v1, r11, %1\n"
" addi r11, r11, 16\n"
-#endif
- " stvx v0, r11, %0\n"
- " stvx v0, r11, %0\n"
- " stvx v0, r11, %0\n"
- " stvx v0, r11, %0\n"
- " bdnz+ 1b\n"
- " add %0, %0, r11\n"
- " add %1, %1, r11\n"
- " addi %1, %1, -16\n"
- : "+b" (dest), "+b" (src)
- : "r" (n/16));
- }
-
- n &= 0xf;
- while(n) {
- *dest++ = *src++;
- n--;
- }
-}
-OIL_DEFINE_IMPL_FULL (copy_u8_altivec2, copy_u8, OIL_IMPL_FLAG_ALTIVEC);
-
-
+ " lvx v2, r11, %1\n"
+ " addi r11, r11, 16\n"
+ " lvx v3, r11, %1\n"
+ " addi r11, r11, 16\n"
+ " lvx v4, r11, %1\n"
+ " addi r11, r11, -48\n"
+ " vperm v0, v0, v1, v5\n"
+ " vperm v1, v1, v2, v5\n"
+ " vperm v2, v2, v3, v5\n"
+ " vperm v3, v3, v4, v5\n"
-static void
-copy_u8_altivec3 (uint8_t *dest, uint8_t *src, int n)
-{
- while((unsigned long)dest & 0xf) {
- *dest++ = *src++;
- n--;
- }
+ " stvx v0, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " stvx v1, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " stvx v2, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " stvx v3, r11, %0\n"
+ " addi r11, r11, 16\n"
+ " vor v0, v4, v4\n"
- if (n/32) {
- asm volatile (
- " mtctr %2\n"
- " li r11, 0\n"
- " lvsl v3, 0, %1\n"
- " lvx v0, 0, %1\n"
- " addi %1, %1, 16\n"
- "1:\n"
- " lvxl v1, r11, %1\n"
- " lvxl v2, r11, %1\n"
- " lvxl v3, r11, %1\n"
- " lvxl v4, r11, %1\n"
- " stvxl v1, r11, %0\n"
- " stvxl v2, r11, %0\n"
- " stvxl v3, r11, %0\n"
- " stvxl v4, r11, %0\n"
" bdnz+ 1b\n"
" add %0, %0, r11\n"
" add %1, %1, r11\n"
" addi %1, %1, -16\n"
: "+b" (dest), "+b" (src)
- : "r" (n/32));
+ : "r" (n/64));
}
- n &= 0xf;
+ n &= 0x3f;
while(n) {
*dest++ = *src++;
n--;
}
}
-OIL_DEFINE_IMPL_FULL (copy_u8_altivec3, copy_u8, OIL_IMPL_FLAG_ALTIVEC);
-
-
-
-
+OIL_DEFINE_IMPL_FULL (copy_u8_altivec2, copy_u8, OIL_IMPL_FLAG_ALTIVEC);
diff --git a/liboil/motovec/Makefile.am b/liboil/motovec/Makefile.am
index a56fb98..12bf0af 100644
--- a/liboil/motovec/Makefile.am
+++ b/liboil/motovec/Makefile.am
@@ -5,7 +5,9 @@ c_sources =
if HAVE_CPU_POWERPC
powerpc_sources = \
- vec_memcpy.S
+ motovec.c \
+ vec_memcpy.S \
+ vec_memset.S
else
powerpc_sources =
endif
@@ -14,4 +16,5 @@ libmotovec_la_SOURCES = \
$(powerpc_sources)
libmotovec_la_LIBADD =
libmotovec_la_CFLAGS = $(LIBOIL_CFLAGS)
+libmotovec_la_CCASFLAGS = $(LIBOIL_CFLAGS)
diff --git a/liboil/motovec/motovec.c b/liboil/motovec/motovec.c
new file mode 100644
index 0000000..d6b0b03
--- /dev/null
+++ b/liboil/motovec/motovec.c
@@ -0,0 +1,55 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboilfunction.h>
+
+OIL_DECLARE_CLASS(copy_u8);
+OIL_DECLARE_CLASS(splat_u8_ns);
+
+void *vec_memcpy(void *dest, void *src, int n);
+void *vec_memset(void *dest, int val, int n);
+
+static void
+copy_u8_motovec (uint8_t *dest, uint8_t *src, int n)
+{
+ vec_memcpy(dest, src, n);
+}
+OIL_DEFINE_IMPL_FULL (copy_u8_motovec, copy_u8, OIL_IMPL_FLAG_ALTIVEC);
+
+static void
+splat_u8_ns_motovec (uint8_t *dest, uint8_t *src, int n)
+{
+ vec_memset(dest, src[0], n);
+}
+OIL_DEFINE_IMPL_FULL (splat_u8_ns_motovec, splat_u8_ns, OIL_IMPL_FLAG_ALTIVEC);
+
+
+