* .cvsignore:

* BUGS: * doc/.cvsignore: * liboil/mmx/Makefile.am: * liboil/mmx/fbmmx.c: * patches/divide.c: * patches/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S: * patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S: * patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S: * patches/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S: * patches/patch-small-lib-2: * testsuite/Makefile.am: * testsuite/list_impls.c: Clean up local source tree. Put spare files in the place where spare files go.
author: David Schleef <ds@schleef.org> 2005-12-22 22:04:54 +0000
committer: David Schleef <ds@schleef.org> 2005-12-22 22:04:54 +0000
commit: 4e381b38abceb843eb6dfd88f734a5c8a38b0b71 (patch)
tree: dcd40e81158778ce3eab664375fa85dadf1e7e80 /patches
parent: a5ebcb17227fa00f782d9e8a00264f90e1116906 (diff)
download: liboil-4e381b38abceb843eb6dfd88f734a5c8a38b0b71.tar.gz
6 files changed, 1252 insertions, 0 deletions
diff --git a/patches/divide.c b/patches/divide.c
new file mode 100644
index 0000000..bb4be16
--- /dev/null
+++ b/patches/divide.c
@@ -0,0 +1,177 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2005 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <math.h>
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+#include <liboil/liboiltest.h>
+#include <liboil/liboilrandom.h>
+#include <stdio.h>
+
+
+static void
+divide_u32_u64_u32_test (OilTest *test)
+{
+  int i;
+  int n;
+  uint64_t *src1;
+  uint32_t *src2;
+
+  src1 = (uint64_t *)oil_test_get_source_data(test, OIL_ARG_SRC1);
+  src2 = (uint32_t *)oil_test_get_source_data(test, OIL_ARG_SRC2);
+  n = p1->post_n;
+  for(i=0;i<n;i++){
+    src2[i] = oil_rand_u32();
+    src1[i] = oil_rand_u32() * (uint64_t)src2[i];
+  }
+}
+
+OIL_DEFINE_CLASS_FULL (divide_u32_u64_u32, "uint32_t *dest, uint64_t *src1, uint32_t *src2, int n",
+    divide_u32_u64_u32_test);
+
+
+static void
+divide_u32_u64_u32_ref (uint32_t *dest, uint64_t *src1, uint32_t *src2, int n)
+{
+  int i;
+
+  for(i=0;i<n;i++){
+    dest[i] = src1[i] / src2[i];
+  }
+
+}
+OIL_DEFINE_IMPL_REF (divide_u32_u64_u32_ref, divide_u32_u64_u32);
+
+static void
+divide_u32_u64_u32_long (uint32_t *dest, uint64_t *src1, uint32_t *src2, int n)
+{
+  int i;
+  int bit;
+  uint64_t a;
+  uint32_t b;
+  uint32_t c;
+
+  for(i=0;i<n;i++){
+    a = src1[i];
+    b = src2[i];
+    c = 0;
+    for (bit = 31; bit >= 0; bit--) {
+      if (a >= ((uint64_t)b)<<bit) {
+        c |= 1<<bit;
+        a -= ((uint64_t)b)<<bit;
+      }
+    }
+    dest[i] = c;
+  }
+
+}
+OIL_DEFINE_IMPL (divide_u32_u64_u32_long, divide_u32_u64_u32);
+
+static int
+binlog(uint32_t x)
+{
+  int y = 0;
+  if (x &0xffff0000) y += 16;
+  if (x &0xff00ff00) y += 8;
+  if (x &0xf0f0f0f0) y += 4;
+  if (x &0xcccccccc) y += 2;
+  if (x &0xaaaaaaaa) y += 1;
+  return y;
+}
+
+static void
+divide_u32_u64_u32_long2 (uint32_t *dest, uint64_t *src1, uint32_t *src2, int n)
+{
+  int i;
+  int bit;
+  uint64_t a;
+  uint32_t b;
+  uint32_t c;
+  uint32_t d;
+  int shift;
+
+  for(i=0;i<n;i++){
+    a = src1[i];
+    b = src2[i];
+
+    d = b;
+    shift = binlog(b);
+
+    d = a>>shift;
+    c = (d/b) << shift;
+    a -= (uint64_t)c * b;
+
+    if (a > 0) {
+      for (bit = 31; bit >= 0; bit--) {
+        if (a >= ((uint64_t)b)<<bit) {
+          c += 1<<bit;
+          a -= ((uint64_t)b)<<bit;
+        }
+        if (a==0) break;
+      }
+    }
+    dest[i] = c;
+  }
+
+}
+OIL_DEFINE_IMPL (divide_u32_u64_u32_long2, divide_u32_u64_u32);
+
+static void
+divide_u32_u64_u32_asm (uint32_t *dest, uint64_t *src1, uint32_t *src2, int n)
+{
+  int i;
+  uint32_t a;
+  uint32_t d;
+
+  for(i=0;i<n;i++){
+    a = src1[i];
+    d = (src1[i]>>32);
+
+    __asm__ __volatile__ ("\n"
+        "  div %2\n"
+        : "+a" (a), "+d" (d)
+        : "r" (src2[i]));
+
+    dest[i] = a;
+  }
+
+}
+OIL_DEFINE_IMPL (divide_u32_u64_u32_asm, divide_u32_u64_u32);
+
+
+
+
+
+
+
+
+
diff --git a/patches/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S b/patches/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S
new file mode 100644
index 0000000..db2cbec
--- /dev/null
+++ b/patches/nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP.S
@@ -0,0 +1,125 @@
+	.file	"nr-compose.c"
+	
+# Ensure Inkscape is execshield protected
+	.section .note.GNU-stack
+	.previous
+	
+	.text
+	.align 2
+.globl nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP
+	.type	nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP,@function
+
+/*
+ * This code is in public domain
+ *
+ * c	 32(%ebp)
+ * srs	 28(%ebp)
+ * spx	 24(%ebp)
+ * rs	 20(%ebp)
+ * h	 16(%ebp)
+ * w	 12(%ebp)
+ * px	 8(%ebp)
+ * r	-8(%ebp)
+ * g	-12(%ebp)
+ * b	-16(%ebp)
+ * a	-20(%ebp)
+ * s	-24(%ebp) -> %esi
+ * d	-28(%ebp) -> %edi
+ * x	-32(%ebp) -> %ebx
+ * y	-36(%ebp)
+ * ca	-40(%ebp)
+ *
+ * mm0 Fg
+ * mm1 FgA
+ * mm2 FgPre
+ * mm3
+ * mm4
+ * mm5
+ * mm6 128
+ * mm7 0
+ *
+*/
+
+nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$36, %esp
+	pushl	%edi
+	pushl	%esi
+
+/* Load %mm7 with [0 0 0 0] */
+	movl	$0, %eax
+	movd	%eax, %mm7
+
+/* Load %mm6 with [128 128 128 128] */
+	movl	$0x80808080, %eax
+	movd	%eax, %mm6
+	punpcklbw %mm7, %mm6
+
+/* FgC -> %mm0 */
+	movl	32(%ebp), %eax
+	movd	(%eax), %mm0
+	punpcklbw %mm7, %mm0
+
+/* for (y = ...) */
+	movl    16(%ebp), %ecx
+.fory:
+
+/* d = px */
+/* s = spx */
+	movl	8(%ebp), %edi
+	movl	24(%ebp), %esi
+
+/* for (x = ...) */
+	movl	12(%ebp), %ebx
+.forx:
+
+/* [m m m m] -> %mm1 */
+	movzbl	(%esi), %eax
+	testb	$0xff, %al
+	jz	.clip
+	movd	%eax, %mm1
+	punpcklwd %mm1, %mm1
+	punpckldq %mm1, %mm1
+
+/* Fg -> mm2 */
+	movq	%mm0, %mm2
+	pmullw	%mm1, %mm2
+	paddw	%mm6, %mm2
+	movq	%mm2, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm3, %mm2
+	psrlw	$8, %mm2
+
+/* Store pixel */
+	packuswb %mm2, %mm2
+	movd	%mm2, (%edi)
+
+.clip:
+	addl	$4, %edi
+	incl	%esi
+
+	decl	%ebx
+	jnz	.forx
+
+	movl	20(%ebp), %eax
+	addl	%eax, 8(%ebp)
+	movl	28(%ebp), %eax
+	addl	%eax, 24(%ebp)
+
+	decl	%ecx
+	jnz	.fory
+
+.exit:
+	emms
+	popl	%esi
+	popl	%edi
+	addl	$36, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+
+.Lfe1:
+	.size	nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_EMPTY_A8_RGBAP
+	.ident	"GCC: (GNU) 3.2"
diff --git a/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S b/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S
new file mode 100644
index 0000000..fe1d9be
--- /dev/null
+++ b/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP.S
@@ -0,0 +1,231 @@
+	.file	"nr-compose.c"
+
+# Ensure Inkscape is execshield protected
+	.section .note.GNU-stack
+	.previous
+	
+	.text
+	.align 2
+.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
+	.type	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,@function
+
+/*
+ * This code is in public domain
+ *
+ * c	 32(%ebp)
+ * srs	 28(%ebp)
+ * spx	 24(%ebp)
+ * rs	 20(%ebp)
+ * h	 16(%ebp)
+ * w	 12(%ebp)
+ * px	 8(%ebp)
+ * r	-8(%ebp)
+ * g	-12(%ebp)
+ * b	-16(%ebp)
+ * a	-20(%ebp)
+ * s	-24(%ebp) -> %esi
+ * d	-28(%ebp) -> %edi
+ * x	-32(%ebp) -> %ebx
+ * y	-36(%ebp)
+ * ca	-40(%ebp)
+ *
+ * mm0 Fg
+ * mm1 MMMM
+ * mm2 FgM
+ * mm3
+ * mm4
+ * mm5 255
+ * mm6 128
+ * mm7 0
+ *
+*/
+
+nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$36, %esp
+	pushl	%edi
+	pushl	%esi
+
+/* Load %mm7 with [0 0 0 0] */
+	movl	$0, %eax
+	movd	%eax, %mm7
+
+/* Load %mm6 with [128 128 128 128] */
+	movl	$0x80808080, %eax
+	movd	%eax, %mm6
+	punpcklbw %mm7, %mm6
+
+/* Load %mm5 with [255 255 255 255] */
+	movl	$0xffffffff, %eax
+	movd	%eax, %mm5
+	punpcklbw %mm7, %mm5
+
+/* FgC -> %mm0 */
+	movl	32(%ebp), %eax
+	movd	(%eax), %mm0
+	punpcklbw %mm7, %mm0
+
+/* Check full opacity */
+	cmpb	$0xff, %al
+	jz	.opaque
+
+/* for (y = ...) */
+	movl    16(%ebp), %ecx
+.fory:
+
+/* d = px */
+/* s = spx */
+	movl	8(%ebp), %edi
+	movl	24(%ebp), %esi
+
+/* for (x = ...) */
+	movl	12(%ebp), %ebx
+.forx:
+
+/* [m m m m] -> %mm1 */
+	movzbl	(%esi), %eax
+	testb	$0xff, %al
+	jz	.clip
+	movd	%eax, %mm1
+	punpcklwd %mm1, %mm1
+	punpckldq %mm1, %mm1
+
+/* Fg -> mm2 */
+	movq	%mm0, %mm2
+	pmullw	%mm1, %mm2
+	paddw	%mm6, %mm2
+	movq	%mm2, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm3, %mm2
+	psrlw	$8, %mm2
+
+/* [255 - FgA] -> mm1 */
+	movq	%mm2, %mm1
+	punpckhwd %mm1, %mm1
+	punpckhdq %mm1, %mm1
+	pxor	%mm5, %mm1
+
+/* Bg -> mm3 */
+	movd	(%edi), %mm3
+	punpcklbw %mm7, %mm3
+
+/* Fg + ((255 - FgA) * Bg) / 255 */
+	pmullw	%mm1, %mm3
+	paddw	%mm6, %mm3
+	movq	%mm3, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm2, %mm3
+
+/* Store pixel */
+	packuswb %mm3, %mm3
+	movd	%mm3, (%edi)
+
+.clip:
+	addl	$4, %edi
+	incl	%esi
+
+	decl	%ebx
+	jnz	.forx
+
+	movl	20(%ebp), %eax
+	addl	%eax, 8(%ebp)
+	movl	28(%ebp), %eax
+	addl	%eax, 24(%ebp)
+
+	decl	%ecx
+	jnz	.fory
+
+.exit:
+	emms
+	popl	%esi
+	popl	%edi
+	addl	$36, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+
+.opaque:
+/* for (y = ...) */
+	movl    16(%ebp), %ecx
+.o_fory:
+
+/* d = px */
+/* s = spx */
+	movl	8(%ebp), %edi
+	movl	24(%ebp), %esi
+
+/* for (x = ...) */
+	movl	12(%ebp), %ebx
+.o_forx:
+
+/* [m m m m] -> %mm1 */
+	movzbl	(%esi), %eax
+	testb	$0xff, %al
+	jz	.o_clip
+	cmpb	$0xff, %al
+	jz	.o_full
+	movd	%eax, %mm1
+	punpcklwd %mm1, %mm1
+	punpckldq %mm1, %mm1
+
+/* Fg -> mm2 */
+	movq	%mm0, %mm2
+	pmullw	%mm1, %mm2
+	paddw	%mm6, %mm2
+	movq	%mm2, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm3, %mm2
+	psrlw	$8, %mm2
+
+/* [255 - FgA] -> mm1 */
+	movq	%mm2, %mm1
+	punpckhwd %mm1, %mm1
+	punpckhdq %mm1, %mm1
+	pxor	%mm5, %mm1
+
+/* Bg -> mm3 */
+	movd	(%edi), %mm3
+	punpcklbw %mm7, %mm3
+
+/* Fg + ((255 - FgA) * Bg) / 255 */
+	pmullw	%mm1, %mm3
+	paddw	%mm6, %mm3
+	movq	%mm3, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm2, %mm3
+
+	jmp	.o_store
+
+.o_full:
+	movq	%mm0, %mm3
+
+.o_store:	
+/* Store pixel */
+	packuswb %mm3, %mm3
+	movd	%mm3, (%edi)
+
+.o_clip:
+	addl	$4, %edi
+	incl	%esi
+
+	decl	%ebx
+	jnz	.o_forx
+
+	movl	20(%ebp), %eax
+	addl	%eax, 8(%ebp)
+	movl	28(%ebp), %eax
+	addl	%eax, 24(%ebp)
+
+	decl	%ecx
+	jnz	.o_fory
+	jmp	.exit
+
+.Lfe1:
+	.size	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP,.Lfe1-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_A8_RGBAP
+	.ident	"GCC: (GNU) 3.2"
diff --git a/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S b/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S
new file mode 100644
index 0000000..e30056a
--- /dev/null
+++ b/patches/nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM.S
@@ -0,0 +1,414 @@
+	.file	"nr-compose-transform.c"
+
+# Ensure Inkscape is execshield protected
+	.section .note.GNU-stack
+	.previous
+	
+	.text
+	.align 2
+.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0
+	.type	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0,@function
+
+/*
+ * This code is in public domain
+ *
+ */
+
+nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$48, %esp
+	pushl	%edi
+	pushl	%esi
+
+/* Load %mm7 with [0 0 0 0] */
+	movl	$0, %eax
+	movd	%eax, %mm7
+
+/* Load %mm6 with [128 128 128 128] */
+	movl	$0x80808080, %eax
+	movd	%eax, %mm6
+	punpcklbw %mm7, %mm6
+
+/* Load %mm5 with [255 255 255 255] */
+	movl	$0xffffffff, %eax
+	movd	%eax, %mm5
+	punpcklbw %mm7, %mm5
+
+/* Load %mm0 with [a a a a] */
+	movzbl	44(%ebp), %eax
+	movd	%eax, %mm0
+	punpcklwd %mm0, %mm0
+	punpckldq %mm0, %mm0
+
+	movl	8(%ebp), %eax
+	movl	%eax, -8(%ebp)
+	movl	40(%ebp), %eax
+	addl	$16, %eax
+	movl	(%eax), %eax
+	movl	%eax, -12(%ebp)
+	movl	40(%ebp), %eax
+	addl	$20, %eax
+	movl	(%eax), %eax
+	movl	%eax, -16(%ebp)
+	movl	$0, -24(%ebp)
+.L29:
+	movl	-24(%ebp), %eax
+	cmpl	16(%ebp), %eax
+	jl	.L32
+	jmp	.L28
+.L32:
+	movl	-8(%ebp), %edi
+
+	movl	-12(%ebp), %eax
+	movl	%eax, %esi
+	movl	-16(%ebp), %eax
+	movl	%eax, -36(%ebp)
+
+	movl	12(%ebp), %ebx
+.for_x_0:
+
+	movl	%esi, %ecx
+	cmpl	$0, %ecx
+	js	.clip_0
+	sarl	$12, %ecx
+	cmpl	28(%ebp), %ecx
+	jge	.clip_0
+	shll	$2, %ecx
+
+	movl	-36(%ebp), %eax
+	cmpl	$0, %eax
+	js	.clip_0
+	sarl	$12, %eax
+	cmpl	32(%ebp), %eax
+	jge	.clip_0
+	imull	36(%ebp), %eax
+
+	addl	%ecx, %eax
+	addl	24(%ebp), %eax
+
+/* Fg -> %mm1 */
+	movl	(%eax), %eax
+	testl	$0xff000000, %eax
+	jz	.clip_0
+	movd	%eax, %mm1
+	punpcklbw %mm7, %mm1
+
+/* [a a a 255] -> %mm3 */
+	shrl	$24, %eax
+	movl	$0x10101, %edx
+	mull	%edx
+	orl	$0xff000000, %eax
+	movd	%eax, %mm3
+	punpcklbw %mm7, %mm3
+
+/* [Fg * a] -> mm1 */
+	pmullw	%mm3, %mm1
+	paddw	%mm6, %mm1
+	movq	%mm1, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm1
+	psrlw	$8, %mm1
+
+/* Multiply by alpha */
+	pmullw	%mm0, %mm1
+	paddw	%mm6, %mm1
+	movq	%mm1, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm1
+	psrlw	$8, %mm1
+
+/* [255 - FgA] -> mm2 */
+	movq	%mm1, %mm2
+	punpckhwd %mm2, %mm2
+	punpckhdq %mm2, %mm2
+	pxor	%mm5, %mm2
+
+/* Bg -> mm3 */
+	movd	(%edi), %mm3
+	punpcklbw %mm7, %mm3
+
+/* Fg + ((255 - FgA) * Bg) / 255 */
+
+	pmullw	%mm2, %mm3
+	paddw	%mm6, %mm3
+	movq	%mm3, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm1, %mm3
+
+/* Store pixel */
+	packuswb %mm3, %mm3
+	movd	%mm3, (%edi)
+
+.clip_0:
+.L37:
+	movl	40(%ebp), %ecx
+	movl	(%ecx), %edx
+	addl	%edx, %esi
+	movl	4(%ecx), %edx
+	addl	%edx, -36(%ebp)
+
+	addl	$4, %edi
+
+	decl	%ebx
+	jnz	.for_x_0
+
+.L34:
+	movl	8(%ecx), %edx
+	addl	%edx, -12(%ebp)
+	movl	12(%ecx), %edx
+	addl	%edx, -16(%ebp)
+
+	movl	20(%ebp), %edx
+	leal	-8(%ebp), %eax
+	addl	%edx, (%eax)
+	leal	-24(%ebp), %eax
+	incl	(%eax)
+	jmp	.L29
+.L28:
+	emms
+	popl	%esi
+	popl	%edi
+	addl	$48, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+.Lfe2:
+	.size	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0,.Lfe2-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_0
+
+/*
+ *
+ *	dbits	52(%ebp)
+ *	alpha	48(%ebp)
+ *	FF_S	44(%ebp)
+ *
+ *	d	-32(%ebp) -> %edi
+ *	i	-60(%ebp) -> %esi
+ *	sx	-64(%ebp) -> %ebx
+ *	sy	-68(%ebp)
+ *	s	-72(%ebp)
+ *
+ *	%mm0	a a a a
+ *	%mm1	FgA
+ *	%mm2	SumFgA
+ *	%mm3	a a a 255
+ *	%mm4
+*/
+
+	.align 2
+.globl nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n
+	.type	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n,@function
+nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$72, %esp
+	pushl	%edi
+	pushl	%esi
+
+/* Load %mm7 with [0 0 0 0] */
+	movl	$0, %eax
+	movd	%eax, %mm7
+
+/* Load %mm6 with [128 128 128 128] */
+	movl	$0x80808080, %eax
+	movd	%eax, %mm6
+	punpcklbw %mm7, %mm6
+
+/* Load %mm5 with [255 255 255 255] */
+	movl	$0xffffffff, %eax
+	movd	%eax, %mm5
+	punpcklbw %mm7, %mm5
+
+/* Load %mm0 with [a a a a] */
+	movzbl	48(%ebp), %eax
+	movd	%eax, %mm0
+	punpcklwd %mm0, %mm0
+	punpckldq %mm0, %mm0
+
+	movl	$1, %eax
+	movzbl	52(%ebp), %ecx
+	sall	%cl, %eax
+	movl	%eax, -8(%ebp)
+	movl	8(%ebp), %eax
+	movl	%eax, -12(%ebp)
+	movl	40(%ebp), %eax
+	addl	$16, %eax
+	movl	(%eax), %eax
+	movl	%eax, -16(%ebp)
+	movl	40(%ebp), %eax
+	addl	$20, %eax
+	movl	(%eax), %eax
+	movl	%eax, -20(%ebp)
+	movl	$0, -28(%ebp)
+.L44:
+	movl	-28(%ebp), %eax
+	cmpl	16(%ebp), %eax
+	jl	.L47
+	jmp	.exit_n
+.L47:
+	movl	-12(%ebp), %eax
+	movl	%eax, -32(%ebp)
+	movl	-16(%ebp), %eax
+	movl	%eax, -36(%ebp)
+	movl	-20(%ebp), %eax
+	movl	%eax, -40(%ebp)
+	movl	$0, -24(%ebp)
+.L48:
+	movl	-24(%ebp), %eax
+	cmpl	12(%ebp), %eax
+	jl	.L51
+	jmp	.L49
+.L51:
+
+/* Zero accumulator */
+	movq	%mm7, %mm2
+
+/* Set i to dptr (size - 1) */
+	movl	-8(%ebp), %esi
+	sub	$1, %esi
+	shll	$3, %esi
+
+	movl	44(%ebp), %edi
+	movl	-36(%ebp), %ecx
+
+.for_i_n:
+	movl	(%edi,%esi), %ebx
+	addl	%ecx, %ebx
+/* Test negative before shift */
+	cmpl	$0, %ebx
+	js	.next_i_n
+	sarl	$12, %ebx
+	cmpl	28(%ebp), %ebx
+	jge	.next_i_n
+/* We multiply sx by 4 here */
+	shll	$2, %ebx
+
+	movl	4(%edi,%esi), %eax
+	addl	-40(%ebp), %eax
+/* Test negative before shift */
+	cmpl	$0, %eax
+	js	.next_i_n
+	sarl	$12, %eax
+	cmpl	32(%ebp), %eax
+	jge	.next_i_n
+/* We multiply sy by srs here */
+	imull	36(%ebp), %eax
+
+	addl	%ebx, %eax
+	addl	24(%ebp), %eax
+
+/* Fg -> %mm1 */
+	movl	(%eax), %eax
+	testl	$0xff000000, %eax
+	jz	.next_i_n
+	movd	%eax, %mm1
+	punpcklbw %mm7, %mm1
+
+/* [a a a 255] -> %mm3 */
+	shrl	$24, %eax
+	movl	$0x10101, %edx
+	mull	%edx
+	orl	$0xff000000, %eax
+	movd	%eax, %mm3
+	punpcklbw %mm7, %mm3
+
+/* [Fg * a] -> mm1 */
+	pmullw	%mm3, %mm1
+	paddw	%mm6, %mm1
+	movq	%mm1, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm1
+	psrlw	$8, %mm1
+
+/* Add to accumulator */
+	paddw	%mm1, %mm2
+
+.next_i_n:
+	subl	$8, %esi
+	jnb	.for_i_n
+
+/* Divide components by sample size */
+	movd	52(%ebp), %mm3
+	psrlw	%mm3, %mm2
+
+/* Multiply by alpha */
+	pmullw	%mm0, %mm2
+	paddw	%mm6, %mm2
+	movq	%mm2, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm2
+	psrlw	$8, %mm2
+
+/* [255 - FgA] -> mm1 */
+	movq	%mm2, %mm1
+	punpckhwd %mm1, %mm1
+	punpckhdq %mm1, %mm1
+	pxor	%mm5, %mm1
+
+	movl	-32(%ebp), %edi
+/* Bg -> mm3 */
+	movd	(%edi), %mm3
+	punpcklbw %mm7, %mm3
+
+/* Fg + ((255 - FgA) * Bg) / 255 */
+
+	pmullw	%mm1, %mm3
+	paddw	%mm6, %mm3
+	movq	%mm3, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm2, %mm3
+
+/* Store pixel */
+	packuswb %mm3, %mm3
+	movd	%mm3, (%edi)
+
+.L58:
+	movl	40(%ebp), %eax
+	movl	(%eax), %edx
+	leal	-36(%ebp), %eax
+	addl	%edx, (%eax)
+	movl	40(%ebp), %eax
+	addl	$4, %eax
+	movl	(%eax), %edx
+	leal	-40(%ebp), %eax
+	addl	%edx, (%eax)
+	leal	-32(%ebp), %eax
+	addl	$4, (%eax)
+	leal	-24(%ebp), %eax
+	incl	(%eax)
+	jmp	.L48
+.L49:
+	movl	40(%ebp), %eax
+	addl	$8, %eax
+	movl	(%eax), %edx
+	leal	-16(%ebp), %eax
+	addl	%edx, (%eax)
+	movl	40(%ebp), %eax
+	addl	$12, %eax
+	movl	(%eax), %edx
+	leal	-20(%ebp), %eax
+	addl	%edx, (%eax)
+	movl	20(%ebp), %edx
+	leal	-12(%ebp), %eax
+	addl	%edx, (%eax)
+	leal	-28(%ebp), %eax
+	incl	(%eax)
+	jmp	.L44
+
+.exit_n:
+	emms
+	popl	%esi
+	popl	%edi
+	addl	$72, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+.Lfe3:
+	.size	nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n,.Lfe3-nr_mmx_R8G8B8A8_P_R8G8B8A8_P_R8G8B8A8_N_TRANSFORM_n
+	.ident	"GCC: (GNU) 3.2"
diff --git a/patches/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S b/patches/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
new file mode 100644
index 0000000..37261e5
--- /dev/null
+++ b/patches/nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P.S
@@ -0,0 +1,227 @@
+	.file	"nr-compose.c"
+
+# Ensure Inkscape is execshield protected
+	.section .note.GNU-stack
+	.previous
+	
+	.text
+	.align 2
+.globl nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
+	.type	nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,@function
+
+/*
+ * This code is in public domain
+ *
+ * alpha 32(%ebp)
+ * srs	 28(%ebp)
+ * spx	 24(%ebp)
+ * rs	 20(%ebp)
+ * h	 16(%ebp)
+ * w	 12(%ebp)
+ * px	 8(%ebp)
+ * r	-8(%ebp)
+ * g	-12(%ebp)
+ * b	-16(%ebp)
+ * a	-20(%ebp)
+ * s	-24(%ebp) -> %esi
+ * d	-28(%ebp) -> %edi
+ * x	-32(%ebp) -> %ebx
+ * y	-36(%ebp)
+ * ca	-40(%ebp)
+ *
+ * mm0 A
+ * mm1 FgA
+ * mm2 FgPre
+ * mm3
+ * mm4
+ * mm5 255
+ * mm6 128
+ * mm7 0
+ *
+*/
+
+nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P:
+	pushl	%ebp
+	movl	%esp, %ebp
+	pushl	%ebx
+	subl	$36, %esp
+	pushl	%edi
+	pushl	%esi
+
+/* Load %mm7 with [0 0 0 0] */
+	movl	$0, %eax
+	movd	%eax, %mm7
+
+/* Load %mm6 with [128 128 128 128] */
+	movl	$0x80808080, %eax
+	movd	%eax, %mm6
+	punpcklbw %mm7, %mm6
+
+/* Load %mm5 with [255 255 255 255] */
+	movl	$0xffffffff, %eax
+	movd	%eax, %mm5
+	punpcklbw %mm7, %mm5
+
+/* Load %mm0 with [a a a a] */
+/* Check full opacity */
+	movzbl	32(%ebp), %eax
+	cmpb	$0xff, %al
+	jz	.opaque
+	movd	%eax, %mm0
+	punpcklwd %mm0, %mm0
+	punpckldq %mm0, %mm0
+
+/* for (y = ...) */
+	movl    16(%ebp), %ecx
+.fory:
+
+/* d = px */
+/* s = spx */
+	movl	8(%ebp), %edi
+	movl	24(%ebp), %esi
+
+/* for (x = ...) */
+	movl	12(%ebp), %ebx
+.forx:
+
+/* Fg -> %mm1 */
+/* fixme: Do we have to bother about alignment here? (Lauris) */
+	movl	(%esi), %eax
+	testl	$0xff000000, %eax
+	jz	.clip
+	movd	%eax, %mm1
+	punpcklbw %mm7, %mm1
+
+/* [Fg * a] -> mm1 */
+	pmullw	%mm0, %mm1
+	paddw	%mm6, %mm1
+	movq	%mm1, %mm2
+	psrlw	$8, %mm2
+	paddw	%mm2, %mm1
+	psrlw	$8, %mm1
+
+/* [255 - FgA] -> mm2 */
+	movq	%mm1, %mm2
+	punpckhwd %mm2, %mm2
+	punpckhdq %mm2, %mm2
+	pxor	%mm5, %mm2
+
+/* Bg -> mm3 */
+	movd	(%edi), %mm3
+	punpcklbw %mm7, %mm3
+
+/* Fg + ((255 - FgA) * Bg) / 255 */
+	pmullw	%mm2, %mm3
+	paddw	%mm6, %mm3
+	movq	%mm3, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm1, %mm3
+
+/* Store pixel */
+	packuswb %mm3, %mm3
+	movd	%mm3, %eax
+	movb	%al, 0(%edi)
+	shrl	$8, %eax
+	movb	%al, 1(%edi)
+	shrl	$8, %eax
+	movb	%al, 2(%edi)
+
+.clip:
+	addl	$3, %edi
+	addl	$4, %esi
+
+	decl	%ebx
+	jnz	.forx
+
+	movl	20(%ebp), %eax
+	addl	%eax, 8(%ebp)
+	movl	28(%ebp), %eax
+	addl	%eax, 24(%ebp)
+
+	decl	%ecx
+	jnz	.fory
+
+.exit:
+	emms
+	popl	%esi
+	popl	%edi
+	addl	$36, %esp
+	popl	%ebx
+	popl	%ebp
+	ret
+
+.opaque:
+/* for (y = ...) */
+	movl    16(%ebp), %ecx
+.o_fory:
+
+/* d = px */
+/* s = spx */
+	movl	8(%ebp), %edi
+	movl	24(%ebp), %esi
+
+/* for (x = ...) */
+	movl	12(%ebp), %ebx
+.o_forx:
+
+/* Fg -> %mm1 */
+/* fixme: Do we have to bother about alignment here? (Lauris) */
+	movl	(%esi), %eax
+	testl	$0xff000000, %eax
+	jz	.o_clip
+	cmpl	$0xff000000, %eax
+	jnb	.o_store
+	movd	%eax, %mm1
+	punpcklbw %mm7, %mm1
+
+/* [255 - FgA] -> mm2 */
+	movq	%mm1, %mm2
+	punpckhwd %mm2, %mm2
+	punpckhdq %mm2, %mm2
+	pxor	%mm5, %mm2
+
+/* Bg -> mm3 */
+	movd	(%edi), %mm3
+	punpcklbw %mm7, %mm3
+
+/* Fg + ((255 - FgA) * Bg) / 255 */
+	pmullw	%mm2, %mm3
+	paddw	%mm6, %mm3
+	movq	%mm3, %mm4
+	psrlw	$8, %mm4
+	paddw	%mm4, %mm3
+	psrlw	$8, %mm3
+	paddw	%mm1, %mm3
+
+/* Store pixel */
+	packuswb %mm3, %mm3
+	movd	%mm3, %eax
+.o_store:
+	movb	%al, 0(%edi)
+	shrl	$8, %eax
+	movb	%al, 1(%edi)
+	shrl	$8, %eax
+	movb	%al, 2(%edi)
+
+.o_clip:
+	addl	$3, %edi
+	addl	$4, %esi
+
+	decl	%ebx
+	jnz	.o_forx
+
+	movl	20(%ebp), %eax
+	addl	%eax, 8(%ebp)
+	movl	28(%ebp), %eax
+	addl	%eax, 24(%ebp)
+
+	decl	%ecx
+	jnz	.o_fory
+
+	jmp .exit
+
+.Lfe1:
+	.size	nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P,.Lfe1-nr_mmx_R8G8B8_R8G8B8_R8G8B8A8_P
+	.ident	"GCC: (GNU) 3.2"
diff --git a/patches/patch-small-lib-2 b/patches/patch-small-lib-2
new file mode 100644
index 0000000..938e36a
--- /dev/null
+++ b/patches/patch-small-lib-2
@@ -0,0 +1,78 @@
+Index: autogen.sh
+===================================================================
+RCS file: /cvs/liboil/liboil/autogen.sh,v
+retrieving revision 1.8
+diff -u -r1.8 autogen.sh
+--- autogen.sh	26 Jul 2005 20:32:36 -0000	1.8
++++ autogen.sh	3 Aug 2005 21:16:18 -0000
+@@ -1,4 +1,4 @@
+ #!/bin/sh
+ 
+ autoreconf -i -f &&
+-./configure --enable-maintainer-mode --disable-static $@
++./configure --enable-maintainer-mode --disable-static --enable-library-peeling $@
+Index: configure.ac
+===================================================================
+RCS file: /cvs/liboil/liboil/configure.ac,v
+retrieving revision 1.56
+diff -u -r1.56 configure.ac
+--- configure.ac	3 Aug 2005 03:33:47 -0000	1.56
++++ configure.ac	3 Aug 2005 21:16:18 -0000
+@@ -60,6 +60,14 @@
+   AC_DEFINE(ENABLE_BROKEN_IMPLS, 1, [Define if compiling broken implementations])
+ fi
+ 
++AC_ARG_ENABLE(library-peeling,
++  AC_HELP_STRING([--enable-library-peeling],[peel unused functions]),
++  enable_library_peeling=$enableval,enable_broken_implementations=no)
++if test "x$enable_library_peeling" = xyes ; then
++  AC_DEFINE(ENABLE_PEELING, 1, [Define if peeling library])
++  LIBOIL_CFLAGS="$LIBOIL_CFLAGS -ffunction-sections -fdata-sections"
++fi
++
+ ##################################################
+ # Check for gtk-doc.
+ ##################################################
+Index: liboil/Makefile.am
+===================================================================
+RCS file: /cvs/liboil/liboil/liboil/Makefile.am,v
+retrieving revision 1.41
+diff -u -r1.41 Makefile.am
+--- liboil/Makefile.am	3 Aug 2005 03:33:47 -0000	1.41
++++ liboil/Makefile.am	3 Aug 2005 21:16:18 -0000
+@@ -46,7 +46,6 @@
+ 	-no-undefined \
+ 	-export-symbols-regex 'oil_'
+ liboiltmp1_la_LIBADD = \
+-	liboilfunctions.la \
+ 	$(LIBM)
+ 
+ liboil_@LIBOIL_MAJORMINOR@_la_SOURCES = \
+@@ -67,12 +66,12 @@
+ 	liboiltest.c \
+ 	liboilmarshal.c
+ liboil_@LIBOIL_MAJORMINOR@_la_LIBADD = \
+-	liboilfunctions.la \
+ 	$(LIBM)
+ liboil_@LIBOIL_MAJORMINOR@_la_CFLAGS = $(LIBOIL_CFLAGS)
+ liboil_@LIBOIL_MAJORMINOR@_la_LDFLAGS = \
+ 	-no-undefined \
+ 	-version-info $(LIBOIL_LIBVERSION) \
++	.libs/liboilfunctions.a \
+ 	-export-symbols-regex '^oil_'
+ 
+ # This is required to use 'make -j2'. Automake doesn't seem to notice
+@@ -111,11 +110,11 @@
+ 	echo '  NULL' >>liboilarray.c.tmp
+ 	echo '};' >>liboilarray.c.tmp
+ 	echo >>liboilarray.c.tmp
+-	grep '^_oil_function_impl_' .libs/liboiltmp1.exp | \
++	grep '^_oil_function_impl_.*ref' .libs/liboiltmp1.exp | \
+ 	  sed 's/.*/extern OilFunctionImpl &;/' >>liboilarray.c.tmp
+ 	echo >>liboilarray.c.tmp
+ 	echo 'OilFunctionImpl *_oil_function_impl_array[] = {' >>liboilarray.c.tmp
+-	grep '^_oil_function_impl_' .libs/liboiltmp1.exp | \
++	grep '^_oil_function_impl_.*ref' .libs/liboiltmp1.exp | \
+ 	  sed 's/.*/  \&&,/' >>liboilarray.c.tmp
+ 	echo '  NULL' >>liboilarray.c.tmp
+ 	echo '};' >>liboilarray.c.tmp
author	David Schleef <ds@schleef.org>	2005-12-22 22:04:54 +0000
committer	David Schleef <ds@schleef.org>	2005-12-22 22:04:54 +0000
commit	4e381b38abceb843eb6dfd88f734a5c8a38b0b71 (patch)
tree	dcd40e81158778ce3eab664375fa85dadf1e7e80 /patches
parent	a5ebcb17227fa00f782d9e8a00264f90e1116906 (diff)
download	liboil-4e381b38abceb843eb6dfd88f734a5c8a38b0b71.tar.gz