summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-04-30 20:15:43 +0000
committerDavid Schleef <ds@schleef.org>2005-04-30 20:15:43 +0000
commit2c6c33f31dcabcb4a9632e8088b2c7bf68566edd (patch)
treedefc73ec8f80aeddc8dc9e3b615c9a5ee7f68cc9
parent9652109d3de3a3557c5174c914d83a6b0b8e6c41 (diff)
downloadliboil-2c6c33f31dcabcb4a9632e8088b2c7bf68566edd.tar.gz
* liboil/copy/Makefile.am: Some cool new implementations.
* liboil/copy/trans8x8_c.c: (trans8x8_u16_c1), (trans8x8_u16_c2), (trans8x8_u16_c3), (trans8x8_u16_c4): * liboil/copy/trans8x8_i386.c: (trans8x8_u16_asm1), (trans8x8_u16_asm2):
-rw-r--r--ChangeLog8
-rw-r--r--liboil/copy/Makefile.am3
-rw-r--r--liboil/copy/trans8x8_c.c118
-rw-r--r--liboil/copy/trans8x8_i386.c98
4 files changed, 226 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index a48cc49..b7fc044 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
2005-04-30 David Schleef <ds@schleef.org>
+ * liboil/copy/Makefile.am: Some cool new implementations.
+ * liboil/copy/trans8x8_c.c: (trans8x8_u16_c1), (trans8x8_u16_c2),
+ (trans8x8_u16_c3), (trans8x8_u16_c4):
+ * liboil/copy/trans8x8_i386.c: (trans8x8_u16_asm1),
+ (trans8x8_u16_asm2):
+
+2005-04-30 David Schleef <ds@schleef.org>
+
* examples/uberopt/uberopt.c: Select 100 random sequences instead
of generating all possible.
* examples/uberopt/it.c: Example code
diff --git a/liboil/copy/Makefile.am b/liboil/copy/Makefile.am
index 8601ff3..92b202e 100644
--- a/liboil/copy/Makefile.am
+++ b/liboil/copy/Makefile.am
@@ -27,7 +27,8 @@ c_sources = \
permute.c \
splat_ref.c \
tablelookup_ref.c \
- trans8x8.c
+ trans8x8.c \
+ trans8x8_c.c
libcopy_la_SOURCES = \
$(c_sources) \
diff --git a/liboil/copy/trans8x8_c.c b/liboil/copy/trans8x8_c.c
new file mode 100644
index 0000000..76b90d0
--- /dev/null
+++ b/liboil/copy/trans8x8_c.c
@@ -0,0 +1,118 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+
+OIL_DECLARE_CLASS (trans8x8_u16);
+
+void
+trans8x8_u16_c1 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
+{
+ int i;
+ int j;
+ uint16_t *d, *s;
+
+ for(i=0;i<8;i++){
+ d = OIL_OFFSET(dest, 2*i);
+ s = OIL_OFFSET(src, sstr*i);
+ for(j=0;j<8;j++){
+ OIL_GET(d,dstr*j,uint16_t) = s[j];
+ }
+ }
+}
+OIL_DEFINE_IMPL (trans8x8_u16_c1, trans8x8_u16);
+
+void
+trans8x8_u16_c2 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
+{
+ int i;
+ uint16_t *d, *s;
+
+ for(i=0;i<8;i++){
+ d = OIL_OFFSET(dest, 2*i);
+ s = OIL_OFFSET(src, sstr*i);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ *d = *s; s++; d = OIL_OFFSET(d, dstr);
+ }
+}
+OIL_DEFINE_IMPL (trans8x8_u16_c2, trans8x8_u16);
+
+void
+trans8x8_u16_c3 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
+{
+ int i;
+ uint16_t *d, *s;
+
+ for(i=0;i<8;i++){
+ d = OIL_OFFSET(dest, 2*i);
+ s = OIL_OFFSET(src, sstr*i);
+ OIL_GET(d,dstr*0,uint16_t) = s[0];
+ OIL_GET(d,dstr*1,uint16_t) = s[1];
+ OIL_GET(d,dstr*2,uint16_t) = s[2];
+ OIL_GET(d,dstr*3,uint16_t) = s[3];
+ OIL_GET(d,dstr*4,uint16_t) = s[4];
+ OIL_GET(d,dstr*5,uint16_t) = s[5];
+ OIL_GET(d,dstr*6,uint16_t) = s[6];
+ OIL_GET(d,dstr*7,uint16_t) = s[7];
+ }
+}
+OIL_DEFINE_IMPL (trans8x8_u16_c3, trans8x8_u16);
+
+void
+trans8x8_u16_c4 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
+{
+ int i;
+ uint16_t *d, *s;
+
+ for(i=0;i<8;i++){
+ d = OIL_OFFSET(dest, 2*i);
+ s = OIL_OFFSET(src, sstr*i);
+ OIL_GET(d,dstr*0,uint16_t) = *s++;
+ OIL_GET(d,dstr*1,uint16_t) = *s++;
+ OIL_GET(d,dstr*2,uint16_t) = *s++;
+ OIL_GET(d,dstr*3,uint16_t) = *s++;
+ OIL_GET(d,dstr*4,uint16_t) = *s++;
+ OIL_GET(d,dstr*5,uint16_t) = *s++;
+ OIL_GET(d,dstr*6,uint16_t) = *s++;
+ OIL_GET(d,dstr*7,uint16_t) = *s++;
+ }
+}
+
+OIL_DEFINE_IMPL (trans8x8_u16_c4, trans8x8_u16);
+
+
diff --git a/liboil/copy/trans8x8_i386.c b/liboil/copy/trans8x8_i386.c
index 4ee5004..2a9a76c 100644
--- a/liboil/copy/trans8x8_i386.c
+++ b/liboil/copy/trans8x8_i386.c
@@ -34,6 +34,7 @@
OIL_DECLARE_CLASS(trans8x8_u16);
+/* this could use additional work. */
static void
trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
{
@@ -135,4 +136,101 @@ trans8x8_u16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
}
OIL_DEFINE_IMPL (trans8x8_u16_mmx, trans8x8_u16);
+void
+trans8x8_u16_asm1 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
+{
+ int saved_ebx = 0;
+ asm (
+ " movl %%ebx, %4 \n"
+ " movl %0, %%ecx \n"
+ " movl %2, %%ebx \n"
+ " movl %1, %%edx \n"
+ " lea (%%ecx,%%edx,8), %%esi \n"
+ " sub %%edx, %%esi\n "
+ " movl $7, %%edi \n"
+ "1: \n"
+
+ " mov (%%ebx), %%ax \n"
+ " mov %%ax,(%%ecx) \n"
+ " mov 2(%%ebx), %%ax \n"
+ " mov %%ax,(%%ecx,%%edx,1) \n"
+ " mov 4(%%ebx), %%ax \n"
+ " mov %%ax,(%%ecx,%%edx,2) \n"
+ " mov 8(%%ebx), %%ax \n"
+ " mov %%ax,(%%ecx,%%edx,4) \n"
+
+ " neg %%edx \n"
+
+ " mov 6(%%ebx), %%ax \n"
+ " mov %%ax,(%%esi,%%edx,4) \n"
+ " mov 10(%%ebx), %%ax \n"
+ " mov %%ax,(%%esi,%%edx,2) \n"
+ " mov 12(%%ebx), %%ax \n"
+ " mov %%ax,(%%esi,%%edx,1) \n"
+ " mov 14(%%ebx), %%ax \n"
+ " mov %%ax,(%%esi) \n"
+
+ " neg %%edx \n"
+ " add %3, %%ebx \n"
+ " add $2, %%ecx \n"
+ " add $2, %%esi \n"
+
+ " dec %%edi \n"
+ " jge 1b \n"
+ " movl %4, %%ebx \n"
+ :
+ : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (saved_ebx)
+ : "eax", "ecx", "edx", "esi", "edi");
+}
+OIL_DEFINE_IMPL (trans8x8_u16_asm1, trans8x8_u16);
+
+void
+trans8x8_u16_asm2 (uint16_t *dest, int dstr, uint16_t *src, int sstr)
+{
+ int i;
+ int saved_ebx = 0;
+ asm (
+ " movl %%ebx, %5 \n"
+ " movl %0, %%ecx \n"
+ " movl %2, %%ebx \n"
+ " movl %1, %%edx \n"
+ " lea (%%ecx,%%edx,8), %%esi \n"
+ " sub %%edx, %%esi\n "
+ " movl $7, %4 \n"
+ " movl %%edx, %%edi \n"
+ " negl %%edi \n"
+ "1: \n"
+
+ " movl (%%ebx), %%eax \n"
+ " mov %%ax,(%%ecx) \n"
+ " shr $16, %%eax \n"
+ " mov %%ax,(%%ecx,%%edx,1) \n"
+
+ " movl 4(%%ebx), %%eax \n"
+ " mov %%ax,(%%ecx,%%edx,2) \n"
+ " shr $16, %%eax \n"
+ " mov %%ax,(%%esi,%%edi,4) \n"
+
+ " movl 8(%%ebx), %%eax \n"
+ " mov %%ax,(%%ecx,%%edx,4) \n"
+ " shr $16, %%eax \n"
+ " mov %%ax,(%%esi,%%edi,2) \n"
+
+ " movl 12(%%ebx), %%eax \n"
+ " mov %%ax,(%%esi,%%edi,1) \n"
+ " shr $16, %%eax \n"
+ " mov %%ax,(%%esi) \n"
+
+ " add %3, %%ebx \n"
+ " add $2, %%ecx \n"
+ " add $2, %%esi \n"
+
+ " decl %4 \n"
+ " jge 1b \n"
+ " movl %5, %%ebx \n"
+ :
+ : "m" (dest), "m" (dstr), "m" (src), "m" (sstr), "m" (i), "m" (saved_ebx)
+ : "eax", "ebx", "ecx", "edx", "esi", "edi");
+}
+OIL_DEFINE_IMPL (trans8x8_u16_asm2, trans8x8_u16);