diff options
author | David Schleef <ds@schleef.org> | 2005-06-18 01:07:34 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-06-18 01:07:34 +0000 |
commit | cb5c9c5b48b447b3fc3c3d31072b9fc6b52b54fd (patch) | |
tree | ee0d804857ab91e066d557333e220257739d1f61 | |
parent | d64fd56082933579566d4bf45d3f421d3eba8392 (diff) | |
download | liboil-cb5c9c5b48b447b3fc3c3d31072b9fc6b52b54fd.tar.gz |
* liboil/copy/copy_powerpc.c: (copy_u8_altivec),
(copy_u8_altivec2): Rewrite so that they actually work.
* liboil/motovec/Makefile.am: Fix up motovec stuff
* liboil/motovec/motovec.c: (copy_u8_motovec),
(splat_u8_ns_motovec): same
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | liboil/copy/copy_powerpc.c | 144 | ||||
-rw-r--r-- | liboil/motovec/Makefile.am | 5 | ||||
-rw-r--r-- | liboil/motovec/motovec.c | 55 |
4 files changed, 128 insertions, 84 deletions
@@ -1,5 +1,13 @@ 2005-06-17 David Schleef <ds@schleef.org> + * liboil/copy/copy_powerpc.c: (copy_u8_altivec), + (copy_u8_altivec2): Rewrite so that they actually work. + * liboil/motovec/Makefile.am: Fix up motovec stuff + * liboil/motovec/motovec.c: (copy_u8_motovec), + (splat_u8_ns_motovec): same + +2005-06-17 David Schleef <ds@schleef.org> + * configure.ac: snarf LIBMOTOVEC because it has a compatible license. * COPYING: diff --git a/liboil/copy/copy_powerpc.c b/liboil/copy/copy_powerpc.c index 7a7cb6b..57b60f2 100644 --- a/liboil/copy/copy_powerpc.c +++ b/liboil/copy/copy_powerpc.c @@ -37,30 +37,46 @@ OIL_DECLARE_CLASS(copy_u8); static void copy_u8_altivec (uint8_t *dest, uint8_t *src, int n) { - while((unsigned long)dest & 0xf) { + while((unsigned long)dest & 0xf && n>0) { *dest++ = *src++; n--; } if (n/16) { - asm volatile ( - " mtctr %2\n" - " li r11, 0\n" - " lvsl v2, 0, %1\n" - " lvx v0, 0, %1\n" - " addi %1, %1, 16\n" - "1:\n" - " lvxl v1, r11, %1\n" - " vperm v0, v0, v1, v2\n" - " stvxl v0, r11, %0\n" - " vor v0, v1, v1\n" - " addi r11, r11, 16\n" - " bdnz+ 1b\n" - " add %0, %0, r11\n" - " add %1, %1, r11\n" - " addi %1, %1, -16\n" - : "+b" (dest), "+b" (src) - : "r" (n/16)); + if ((unsigned long)src & 0xf) { + asm volatile ( + " mtctr %2\n" + " li r11, 0\n" + " lvsl v2, 0, %1\n" + " lvx v0, 0, %1\n" + " addi %1, %1, 16\n" + "1:\n" + " lvx v1, r11, %1\n" + " vperm v0, v0, v1, v2\n" + " stvx v0, r11, %0\n" + " vor v0, v1, v1\n" + " addi r11, r11, 16\n" + " bdnz+ 1b\n" + " add %0, %0, r11\n" + " add %1, %1, r11\n" + " addi %1, %1, -16\n" + : "+b" (dest), "+b" (src) + : "r" (n/16)); + } else { + asm volatile ( + " mtctr %2\n" + " li r11, 0\n" + "1:\n" + " lvx v1, r11, %1\n" + " stvx v1, r11, %0\n" + " addi r11, r11, 16\n" + " bdnz+ 1b\n" + " add %0, %0, r11\n" + " add %1, %1, r11\n" + " addi %1, %1, -16\n" + : "+b" (dest), "+b" (src) + : "r" (n/16)); + } } n &= 0xf; @@ -71,98 +87,60 @@ copy_u8_altivec (uint8_t *dest, uint8_t *src, int n) } OIL_DEFINE_IMPL_FULL (copy_u8_altivec, copy_u8, OIL_IMPL_FLAG_ALTIVEC); - - - - static void copy_u8_altivec2 (uint8_t *dest, uint8_t *src, int n) { - while((unsigned long)dest & 0xf) { + while((unsigned long)dest & 0xf && n>0) { *dest++ = *src++; n--; } - if (n/16) { + if (n/64) { asm volatile ( " mtctr %2\n" " li r11, 0\n" - " lvsl v2, 0, %1\n" + " lvsl v5, 0, %1\n" " lvx v0, 0, %1\n" " addi %1, %1, 16\n" "1:\n" -#if 0 - " lvxl v1, r11, %1\n" - " vperm v0, v0, v1, v2\n" - " stvxl v0, r11, %0\n" - " vor v0, v1, v1\n" + " lvx v1, r11, %1\n" " addi r11, r11, 16\n" -#endif - " stvx v0, r11, %0\n" - " stvx v0, r11, %0\n" - " stvx v0, r11, %0\n" - " stvx v0, r11, %0\n" - " bdnz+ 1b\n" - " add %0, %0, r11\n" - " add %1, %1, r11\n" - " addi %1, %1, -16\n" - : "+b" (dest), "+b" (src) - : "r" (n/16)); - } - - n &= 0xf; - while(n) { - *dest++ = *src++; - n--; - } -} -OIL_DEFINE_IMPL_FULL (copy_u8_altivec2, copy_u8, OIL_IMPL_FLAG_ALTIVEC); - - + " lvx v2, r11, %1\n" + " addi r11, r11, 16\n" + " lvx v3, r11, %1\n" + " addi r11, r11, 16\n" + " lvx v4, r11, %1\n" + " addi r11, r11, -48\n" + " vperm v0, v0, v1, v5\n" + " vperm v1, v1, v2, v5\n" + " vperm v2, v2, v3, v5\n" + " vperm v3, v3, v4, v5\n" -static void -copy_u8_altivec3 (uint8_t *dest, uint8_t *src, int n) -{ - while((unsigned long)dest & 0xf) { - *dest++ = *src++; - n--; - } + " stvx v0, r11, %0\n" + " addi r11, r11, 16\n" + " stvx v1, r11, %0\n" + " addi r11, r11, 16\n" + " stvx v2, r11, %0\n" + " addi r11, r11, 16\n" + " stvx v3, r11, %0\n" + " addi r11, r11, 16\n" + " vor v0, v4, v4\n" - if (n/32) { - asm volatile ( - " mtctr %2\n" - " li r11, 0\n" - " lvsl v3, 0, %1\n" - " lvx v0, 0, %1\n" - " addi %1, %1, 16\n" - "1:\n" - " lvxl v1, r11, %1\n" - " lvxl v2, r11, %1\n" - " lvxl v3, r11, %1\n" - " lvxl v4, r11, %1\n" - " stvxl v1, r11, %0\n" - " stvxl v2, r11, %0\n" - " stvxl v3, r11, %0\n" - " stvxl v4, r11, %0\n" " bdnz+ 1b\n" " add %0, %0, r11\n" " add %1, %1, r11\n" " addi %1, %1, -16\n" : "+b" (dest), "+b" (src) - : "r" (n/32)); + : "r" (n/64)); } - n &= 0xf; + n &= 0x3f; while(n) { *dest++ = *src++; n--; } } -OIL_DEFINE_IMPL_FULL (copy_u8_altivec3, copy_u8, OIL_IMPL_FLAG_ALTIVEC); - - - - +OIL_DEFINE_IMPL_FULL (copy_u8_altivec2, copy_u8, OIL_IMPL_FLAG_ALTIVEC); diff --git a/liboil/motovec/Makefile.am b/liboil/motovec/Makefile.am index a56fb98..12bf0af 100644 --- a/liboil/motovec/Makefile.am +++ b/liboil/motovec/Makefile.am @@ -5,7 +5,9 @@ c_sources = if HAVE_CPU_POWERPC powerpc_sources = \ - vec_memcpy.S + motovec.c \ + vec_memcpy.S \ + vec_memset.S else powerpc_sources = endif @@ -14,4 +16,5 @@ libmotovec_la_SOURCES = \ $(powerpc_sources) libmotovec_la_LIBADD = libmotovec_la_CFLAGS = $(LIBOIL_CFLAGS) +libmotovec_la_CCASFLAGS = $(LIBOIL_CFLAGS) diff --git a/liboil/motovec/motovec.c b/liboil/motovec/motovec.c new file mode 100644 index 0000000..d6b0b03 --- /dev/null +++ b/liboil/motovec/motovec.c @@ -0,0 +1,55 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2005 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <liboil/liboilfunction.h> + +OIL_DECLARE_CLASS(copy_u8); +OIL_DECLARE_CLASS(splat_u8_ns); + +void *vec_memcpy(void *dest, void *src, int n); +void *vec_memset(void *dest, int val, int n); + +static void +copy_u8_motovec (uint8_t *dest, uint8_t *src, int n) +{ + vec_memcpy(dest, src, n); +} +OIL_DEFINE_IMPL_FULL (copy_u8_motovec, copy_u8, OIL_IMPL_FLAG_ALTIVEC); + +static void +splat_u8_ns_motovec (uint8_t *dest, uint8_t *src, int n) +{ + vec_memset(dest, src[0], n); +} +OIL_DEFINE_IMPL_FULL (splat_u8_ns_motovec, splat_u8_ns, OIL_IMPL_FLAG_ALTIVEC); + + + |