summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2004-12-30 05:47:41 +0000
committerDavid Schleef <ds@schleef.org>2004-12-30 05:47:41 +0000
commitee6786e10ddaafc2566f182595e4005db1b7a612 (patch)
treed508d3b5bae7790b1f1ad460f7cb4553226117e7
parente692a57731086358b0f36fd2c0c3483c430fcc8c (diff)
downloadliboil-ee6786e10ddaafc2566f182595e4005db1b7a612.tar.gz
* liboil/colorspace/Makefile.am: some new classes
* liboil/colorspace/argb_paint.c: (argb_paint_u8_ref), (argb_paint_u8_fast): * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx): * liboil/colorspace/ayuv2argb.c: (ayuv2argb_u8_ref), (ayuv2argb_u8_int): * liboil/colorspace/ayuv2argb_i386.c: (ayuv2argb_u8_mmx), (ayuv2argb_u8_mmx2), (ayuv2argb_u8_mmx3):
-rw-r--r--ChangeLog11
-rw-r--r--liboil/colorspace/Makefile.am15
-rw-r--r--liboil/colorspace/argb_paint.c82
-rw-r--r--liboil/colorspace/argb_paint_i386.c87
-rw-r--r--liboil/colorspace/ayuv2argb.c84
-rw-r--r--liboil/colorspace/ayuv2argb_i386.c174
6 files changed, 451 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 2195b13..58c9abf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2004-12-29 David Schleef <ds@schleef.org>
+
+ * liboil/colorspace/Makefile.am: some new classes
+ * liboil/colorspace/argb_paint.c: (argb_paint_u8_ref),
+ (argb_paint_u8_fast):
+ * liboil/colorspace/argb_paint_i386.c: (argb_paint_u8_mmx):
+ * liboil/colorspace/ayuv2argb.c: (ayuv2argb_u8_ref),
+ (ayuv2argb_u8_int):
+ * liboil/colorspace/ayuv2argb_i386.c: (ayuv2argb_u8_mmx),
+ (ayuv2argb_u8_mmx2), (ayuv2argb_u8_mmx3):
+
2004-12-27 David Schleef <ds@schleef.org>
* liboil/copy/copy_i386.c: (copy_u8_mmx), (copy_u8_mmx2):
diff --git a/liboil/colorspace/Makefile.am b/liboil/colorspace/Makefile.am
index 2ffcfc4..8aa5929 100644
--- a/liboil/colorspace/Makefile.am
+++ b/liboil/colorspace/Makefile.am
@@ -8,7 +8,9 @@ noinst_LTLIBRARIES = libcolorspace.la $(opt_libs)
c_sources = \
rgb2bgr.c \
- rgb2rgba.c
+ rgb2rgba.c \
+ ayuv2argb.c \
+ argb_paint.c
if HAVE_CPU_POWERPC
powerpc_sources = \
@@ -18,9 +20,18 @@ else
powerpc_sources =
endif
+if HAVE_CPU_I386
+i386_sources = \
+ ayuv2argb_i386.c \
+ argb_paint_i386.c
+else
+i386_sources =
+endif
+
libcolorspace_la_SOURCES = \
$(c_sources) \
- $(powerpc_sources)
+ $(powerpc_sources) \
+ $(i386_sources)
libcolorspace_la_LIBADD = $(opt_libs)
libcolorspace_la_CFLAGS = $(LIBOIL_CFLAGS)
diff --git a/liboil/colorspace/argb_paint.c b/liboil/colorspace/argb_paint.c
new file mode 100644
index 0000000..6c7598a
--- /dev/null
+++ b/liboil/colorspace/argb_paint.c
@@ -0,0 +1,82 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+
+OIL_DEFINE_CLASS (argb_paint_u8, "uint8_t *i_4xn, uint8_t *s1_4, uint8_t *s2_n, int n");
+
+
+#define imult(a,b) (((a)*(b) + (((a)*(b)) >> 8))>>8)
+#define apply(a,b,c) (imult(a,255-c) + imult(b,c))
+
+static void
+argb_paint_u8_ref (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
+{
+ int i;
+
+ for(i=0;i<n;i++){
+ dest[0] = apply(dest[0],color[0],alpha[0]);
+ dest[1] = apply(dest[1],color[1],alpha[0]);
+ dest[2] = apply(dest[2],color[2],alpha[0]);
+ dest[3] = apply(dest[3],color[3],alpha[0]);
+ dest+=4;
+ alpha++;
+ }
+
+}
+OIL_DEFINE_IMPL_REF (argb_paint_u8_ref, argb_paint_u8);
+
+static void
+argb_paint_u8_fast (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
+{
+ int i;
+
+ for(i=0;i<n;i++){
+ if (*alpha == 0) {
+ } else if (*alpha == 255) {
+ dest[0] = color[0];
+ dest[1] = color[1];
+ dest[2] = color[2];
+ dest[3] = color[3];
+ } else {
+ dest[0] = apply(dest[0],color[0],alpha[0]);
+ dest[1] = apply(dest[1],color[1],alpha[0]);
+ dest[2] = apply(dest[2],color[2],alpha[0]);
+ dest[3] = apply(dest[3],color[3],alpha[0]);
+ }
+ dest+=4;
+ alpha++;
+ }
+
+}
+OIL_DEFINE_IMPL (argb_paint_u8_fast, argb_paint_u8);
+
diff --git a/liboil/colorspace/argb_paint_i386.c b/liboil/colorspace/argb_paint_i386.c
new file mode 100644
index 0000000..b342a7a
--- /dev/null
+++ b/liboil/colorspace/argb_paint_i386.c
@@ -0,0 +1,87 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+
+OIL_DECLARE_CLASS (argb_paint_u8);
+
+
+#define imult(a,b) (((a)*(b) + (((a)*(b)) >> 8))>>8)
+#define apply(a,b,c) (imult(a,255-c) + imult(b,c))
+
+static short constants[][4] = {
+ { 255, 255, 255, 255 }
+};
+
+static void
+argb_paint_u8_mmx (uint8_t *dest, uint8_t *color, uint8_t *alpha, int n)
+{
+ if (n<1)return;
+ asm volatile (
+ " pxor %%mm0, %%mm0\n"
+ " movq (%1), %%mm3\n"
+ " punpcklbw %%mm0, %%mm3\n"
+ " movl $0, %1\n"
+ "1:\n"
+ " movq (%0), %%mm1\n"
+ " punpcklbw %%mm0, %%mm1\n"
+ " movb (%2), %%al\n"
+ " je 3f\n"
+ " cmpl $255, %1\n"
+ " jne 2f\n"
+ " movd %%mm3, (%0)\n"
+ " jmp 3f\n"
+ "2:\n"
+ " movd %1, %%mm2\n"
+ " pshufw $0x00, %%mm2, %%mm2\n"
+ " movq 0(%4), %%mm4\n"
+ " psubw %%mm2, %%mm4\n"
+ " pmullw %%mm1, %%mm4\n"
+ " pmullw %%mm3, %%mm2\n"
+ " paddw %%mm4, %%mm2\n"
+ " movq %%mm2, %%mm1\n"
+ " psrlw $8, %%mm1\n"
+ " paddw %%mm1, %%mm2\n"
+ " psrlw $8, %%mm2\n"
+ " packuswb %%mm0, %%mm2\n"
+ " movd %%mm2, (%0)\n"
+ "3:\n"
+ " add $4, %0\n"
+ " add $1, %2\n"
+ " decl %3\n"
+ " jne 1b\n"
+ " emms\n"
+ : "+r" (dest), "+%%eax" (color), "+r" (alpha), "+r" (n)
+ : "r" (&constants));
+}
+OIL_DEFINE_IMPL_FULL (argb_paint_u8_mmx, argb_paint_u8, OIL_IMPL_FLAG_MMX);
+
diff --git a/liboil/colorspace/ayuv2argb.c b/liboil/colorspace/ayuv2argb.c
new file mode 100644
index 0000000..6b8b8e8
--- /dev/null
+++ b/liboil/colorspace/ayuv2argb.c
@@ -0,0 +1,84 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+
+OIL_DEFINE_CLASS (ayuv2argb_u8, "uint8_t *d_4xn, uint8_t *s_4xn, int n");
+
+#define clamp(x,a,b) clamp_lower(clamp_upper(x,b),a)
+#define clamp_lower(x,a) ((x<a)?(a):(x))
+#define clamp_upper(x,b) ((x>b)?(b):(x))
+
+/* from the JFIF spec */
+#define YUV_TO_R(y,u,v) clamp((y) + 1.402*((v)-128.0),0,255)
+#define YUV_TO_G(y,u,v) clamp((y) - 0.34414*((u)-128.0) - 0.71414*((v)-128.0),0,255)
+#define YUV_TO_B(y,u,v) clamp((y) + 1.772*((u)-128.0),0,255)
+
+#define YUV_TO_R_INT(y,u,v) clamp(((y)*256 + 358*((v)-128))>>8,0,255)
+#define YUV_TO_G_INT(y,u,v) clamp(((y)*256 - 88*((u)-128) - 183*((v)-128))>>8,0,255)
+#define YUV_TO_B_INT(y,u,v) clamp(((y)*256 + 454*((u)-128))>>8,0,255)
+
+
+static void
+ayuv2argb_u8_ref (uint8_t *argb, uint8_t *ayuv, int n)
+{
+ int i;
+
+ for(i=0;i<n;i++){
+ argb[0] = ayuv[0];
+ argb[1] = YUV_TO_R(ayuv[1], ayuv[2], ayuv[3]);
+ argb[2] = YUV_TO_G(ayuv[1], ayuv[2], ayuv[3]);
+ argb[3] = YUV_TO_B(ayuv[1], ayuv[2], ayuv[3]);
+ argb+=4;
+ ayuv+=4;
+ }
+
+}
+OIL_DEFINE_IMPL_REF (ayuv2argb_u8_ref, ayuv2argb_u8);
+
+static void
+ayuv2argb_u8_int (uint8_t *argb, uint8_t *ayuv, int n)
+{
+ int i;
+
+ for(i=0;i<n;i++){
+ argb[0] = ayuv[0];
+ argb[1] = YUV_TO_R_INT(ayuv[1], ayuv[2], ayuv[3]);
+ argb[2] = YUV_TO_G_INT(ayuv[1], ayuv[2], ayuv[3]);
+ argb[3] = YUV_TO_B_INT(ayuv[1], ayuv[2], ayuv[3]);
+ argb+=4;
+ ayuv+=4;
+ }
+
+}
+OIL_DEFINE_IMPL (ayuv2argb_u8_int, ayuv2argb_u8);
+
diff --git a/liboil/colorspace/ayuv2argb_i386.c b/liboil/colorspace/ayuv2argb_i386.c
new file mode 100644
index 0000000..2cbdf06
--- /dev/null
+++ b/liboil/colorspace/ayuv2argb_i386.c
@@ -0,0 +1,174 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboilfunction.h>
+
+OIL_DECLARE_CLASS (ayuv2argb_u8);
+
+#define clamp(x,a,b) clamp_lower(clamp_upper(x,b),a)
+#define clamp_lower(x,a) ((x<a)?(a):(x))
+#define clamp_upper(x,b) ((x>b)?(b):(x))
+
+/* from the JFIF spec */
+#define YUV_TO_R(y,u,v) clamp((y) + 1.402*((v)-128),0,255)
+#define YUV_TO_G(y,u,v) clamp((y) - 0.34414*((u)-128) - 0.71414*((v)-128),0,255)
+#define YUV_TO_B(y,u,v) clamp((y) + 1.772*((u)-128),0,255)
+
+#define YUV_TO_R_INT(y,u,v) clamp(((y)*256 + 358*((v)-128))>>8,0,255)
+#define YUV_TO_G_INT(y,u,v) clamp(((y)*256 - 88*((u)-128) - 183*((v)-128))>>8,0,255)
+#define YUV_TO_B_INT(y,u,v) clamp(((y)*256 + 454*((u)-128))>>8,0,255)
+
+static short constants[][4] = {
+ { 0, 0, 128, 128 },
+ { 128, 0, 0, 0 },
+ { 0, 128, 128, 128 },
+ { 0, 0, -44, 227 },
+ { 0, 179, -91, 0 }
+};
+
+static void
+ayuv2argb_u8_mmx (uint8_t *argb, uint8_t *ayuv, int n)
+{
+ asm volatile (
+ " pxor %%mm0, %%mm0\n"
+ "1:\n"
+ " movq (%1), %%mm1\n"
+ " add $4, %1\n"
+ " punpcklbw %%mm0, %%mm1\n"
+ " psubw 0(%3), %%mm1\n"
+ " pshufw $0x55, %%mm1, %%mm2\n"
+ " pshufw $0xaa, %%mm1, %%mm3\n"
+ " pshufw $0xff, %%mm1, %%mm4\n"
+ " pshufw $0x00, %%mm1, %%mm1\n"
+ " pmullw 8(%3), %%mm1\n"
+ " pmullw 16(%3), %%mm2\n"
+ " pmullw 24(%3), %%mm3\n"
+ " pmullw 32(%3), %%mm4\n"
+ " paddsw %%mm2, %%mm1\n"
+ " paddsw %%mm4, %%mm3\n"
+ " paddsw %%mm3, %%mm1\n"
+ " psraw $7, %%mm1\n"
+ " packuswb %%mm0, %%mm1\n"
+ " movd %%mm1, (%0)\n"
+ " add $4, %0\n"
+ " decl %2\n"
+ " jne 1b\n"
+ " emms\n"
+ : "+r" (argb), "+r" (ayuv), "+r" (n)
+ : "r" (&constants));
+
+}
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx, ayuv2argb_u8, OIL_IMPL_FLAG_MMX);
+
+
+static short constants2[][4] = {
+ { 0, 0, 128, 128 },
+ { -1, 0, 0, 0 },
+ { 0, -1, -1, -1 },
+ { 0, 0, -44, 227 },
+ { 0, 179, -91, 0 }
+};
+
+
+static void
+ayuv2argb_u8_mmx2 (uint8_t *argb, uint8_t *ayuv, int n)
+{
+ asm volatile (
+ " pxor %%mm0, %%mm0\n"
+ "1:\n"
+ " movq (%1), %%mm1\n"
+ " add $4, %1\n"
+ " punpcklbw %%mm0, %%mm1\n"
+ " psubw 0(%3), %%mm1\n"
+ " pshufw $0x55, %%mm1, %%mm2\n"
+ " pshufw $0xaa, %%mm1, %%mm3\n"
+ " pshufw $0xff, %%mm1, %%mm4\n"
+ " pshufw $0x00, %%mm1, %%mm1\n"
+ " pand 8(%3), %%mm1\n"
+ " pand 16(%3), %%mm2\n"
+ " pmullw 24(%3), %%mm3\n"
+ " pmullw 32(%3), %%mm4\n"
+ " paddsw %%mm4, %%mm3\n"
+ " psraw $7, %%mm3\n"
+ " paddsw %%mm2, %%mm1\n"
+ " paddsw %%mm3, %%mm1\n"
+ " packuswb %%mm0, %%mm1\n"
+ " movd %%mm1, (%0)\n"
+ " add $4, %0\n"
+ " decl %2\n"
+ " jne 1b\n"
+ " emms\n"
+ : "+r" (argb), "+r" (ayuv), "+r" (n)
+ : "r" (&constants2));
+
+}
+
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx2, ayuv2argb_u8, OIL_IMPL_FLAG_MMX);
+
+static void
+ayuv2argb_u8_mmx3 (uint8_t *argb, uint8_t *ayuv, int n)
+{
+ asm volatile (
+ " pxor %%mm0, %%mm0\n"
+ " movq 8(%3), %%mm5\n"
+ " movq 16(%3), %%mm6\n"
+ " movq 24(%3), %%mm7\n"
+ //" movq 32(%3), %%mm8\n"
+ "1:\n"
+ " movq (%1), %%mm1\n"
+ " add $4, %1\n"
+ " punpcklbw %%mm0, %%mm1\n"
+ " psubw 0(%3), %%mm1\n"
+ " pshufw $0x55, %%mm1, %%mm2\n"
+ " pshufw $0xaa, %%mm1, %%mm3\n"
+ " pshufw $0xff, %%mm1, %%mm4\n"
+ " pshufw $0x00, %%mm1, %%mm1\n"
+ " pand %%mm5, %%mm1\n"
+ " pand %%mm6, %%mm2\n"
+ " pmullw %%mm7, %%mm3\n"
+ " pmullw 32(%3), %%mm4\n"
+ " paddsw %%mm4, %%mm3\n"
+ " psraw $7, %%mm3\n"
+ " paddsw %%mm2, %%mm1\n"
+ " paddsw %%mm3, %%mm1\n"
+ " packuswb %%mm0, %%mm1\n"
+ " movd %%mm1, (%0)\n"
+ " add $4, %0\n"
+ " decl %2\n"
+ " jne 1b\n"
+ " emms\n"
+ : "+r" (argb), "+r" (ayuv), "+r" (n)
+ : "r" (&constants2));
+
+}
+OIL_DEFINE_IMPL_FULL (ayuv2argb_u8_mmx3, ayuv2argb_u8, OIL_IMPL_FLAG_MMX);
+