summaryrefslogtreecommitdiff
path: root/liboil/dct
diff options
context:
space:
mode:
authorDavid Schleef <ds@schleef.org>2005-04-30 06:00:56 +0000
committerDavid Schleef <ds@schleef.org>2005-04-30 06:00:56 +0000
commit81e196203fe61e8dedc5f10e03b9ec7c9d2a7eff (patch)
treeed85b753dc0cc8ef5c1083c2f704e150cd52b504 /liboil/dct
parenteb6ae36041277cde75b484d3e0fc35277f83a52a (diff)
downloadliboil-81e196203fe61e8dedc5f10e03b9ec7c9d2a7eff.tar.gz
* examples/Makefile.am: add oil-test
* examples/oil-test.c: A copy of work.c modified for displaying test results for any class. * liboil/dct/Makefile.am: * liboil/dct/idct8x8_i386.c: (idct8x8_s16_mmx), (fdct8x8s_s16_mmx): Add mmx code for idct * liboil/dct/idct8x8theora_ref.c: Add some classes for idct8x8 to the theora spec. * liboil/liboilfuncs.h: update
Diffstat (limited to 'liboil/dct')
-rw-r--r--liboil/dct/Makefile.am3
-rw-r--r--liboil/dct/idct8x8_i386.c356
-rw-r--r--liboil/dct/idct8x8theora_ref.c200
3 files changed, 545 insertions, 14 deletions
diff --git a/liboil/dct/Makefile.am b/liboil/dct/Makefile.am
index 9744e53..a407a45 100644
--- a/liboil/dct/Makefile.am
+++ b/liboil/dct/Makefile.am
@@ -25,7 +25,8 @@ c_sources = \
fdct8x8s_s16.c \
idct8_f64.c \
idct8x8_c.c \
- imdct32_f32.c
+ imdct32_f32.c \
+ idct8x8theora_ref.c
libdct_la_SOURCES = \
$(c_sources) \
diff --git a/liboil/dct/idct8x8_i386.c b/liboil/dct/idct8x8_i386.c
index 0f68c72..c1d79e9 100644
--- a/liboil/dct/idct8x8_i386.c
+++ b/liboil/dct/idct8x8_i386.c
@@ -37,20 +37,357 @@
OIL_DECLARE_CLASS (idct8x8_s16);
OIL_DECLARE_CLASS (dct8x8_s16);
-#if 0
+#define CONST(x) (32768.0*(x) + 0.5)
+
+#define C1_0000 (32767)
+#define C0_9808 CONST(0.980785280)
+#define C0_9239 CONST(0.923879532)
+#define C0_8315 CONST(0.831469612)
+#define C0_7071 CONST(0.707106781)
+#define C0_5556 CONST(0.555570233)
+#define C0_3827 CONST(0.382683432)
+#define C0_1951 CONST(0.195090322)
+
+#define FOUR(x) { x, x, x, x }
+#define MMX_CONST(x) {32768.0*(x) + 0.5,32768.0*(x) + 0.5,32768.0*(x) + 0.5,32768.0*(x) + 0.5}
+
+static const int16_t
+dct_mmx_constants [][4] = {
+ FOUR(0),
+ FOUR(C0_9808),
+ FOUR(C0_9239),
+ FOUR(C0_8315),
+ FOUR(C0_7071),
+ FOUR(C0_5556),
+ FOUR(C0_3827),
+ FOUR(C0_1951),
+ { 1, 1, -1, -1 }, // 64
+ { 1, -1, 1, -1 },
+ { C1_0000, C0_9239, C0_7071, C0_3827 }, // 80
+ { C1_0000, C0_3827, C0_7071, C0_9239 }, // 88
+ { C0_9808, C0_8315, C0_5556, C0_1951 }, // 96
+ { C0_8315, C0_1951, C0_9808, C0_5556 }, // 104
+ { 1, -1, -1, -1 },
+ { C0_5556, C0_9808, C0_1951, C0_8315 }, // 120
+ { 1, -1, 1, 1 },
+ { C0_1951, C0_5556, C0_8315, C0_9808 }, // 136
+ { 1, -1, 1, -1 },
+ FOUR(CONST(0.5)), //152
+ { C0_7071, C0_9239, C0_7071, C0_3827 }, // 160
+ { C0_7071, C0_3827, C0_7071, C0_9239 }, // 168
+};
+
static void
idct8x8_s16_mmx (int16_t *dest, int dstr, int16_t *src, int sstr)
{
+ int32_t tmp[32];
+
asm volatile (
- ""
+ /* left half */
+ " movl %1, %%eax \n" // src
+ " movl %3, %%ebx \n" // sstr
+ " leal (%%eax,%%ebx,4),%%ecx \n" // src + sstr * 4
+
+ " movq (%%eax), %%mm0 \n"
+ " movq (%%eax), %%mm1 \n"
+ " paddsw (%%ecx), %%mm0 \n" // ss07s34
+ " psubsw (%%ecx), %%mm1 \n" // ss16s25
+ " pmulhw 32(%5), %%mm0 \n" // .7071
+ " pmulhw 32(%5), %%mm1 \n" // .7071
+
+ " movq (%%eax,%%ebx,2), %%mm2 \n"
+ " movq (%%eax,%%ebx,2), %%mm3 \n"
+ " movq (%%ecx,%%ebx,2), %%mm4 \n"
+ " movq (%%ecx,%%ebx,2), %%mm5 \n"
+ " pmulhw 16(%5), %%mm2 \n" // .9239
+ " pmulhw 48(%5), %%mm3 \n" // .3827
+ " pmulhw 48(%5), %%mm4 \n" // .3827
+ " pmulhw 16(%5), %%mm5 \n" // .9239
+ " paddsw %%mm4, %%mm2 \n" // ds07s34
+ " psubsw %%mm5, %%mm3 \n" // ds16s25
+
+ " movq %%mm0, %%mm4 \n"
+ " movq %%mm1, %%mm5 \n"
+ " paddsw %%mm2, %%mm0 \n" // s07
+ " psubsw %%mm2, %%mm4 \n" // s34
+ " paddsw %%mm3, %%mm1 \n" // s16
+ " psubsw %%mm3, %%mm5 \n" // s25
+
+ " movq %%mm0, 0(%4) \n"
+ " movq %%mm1, 8(%4) \n"
+ " movq %%mm5, 16(%4) \n"
+ " movq %%mm4, 24(%4) \n"
+
+ " addl %3, %%eax \n"
+ " addl %3, %%ecx \n"
+
+ " movq (%%eax), %%mm0 \n"
+ " pmulhw 8(%5), %%mm0 \n"
+ " movq (%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 24(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm0 \n"
+ " movq (%%ecx), %%mm1 \n"
+ " pmulhw 40(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm0 \n"
+ " movq (%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 56(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm0 \n" // d07
+
+ " movq (%%eax), %%mm2 \n"
+ " pmulhw 24(%5), %%mm2 \n"
+ " movq (%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 56(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm2 \n"
+ " movq (%%ecx), %%mm1 \n"
+ " pmulhw 8(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm2 \n"
+ " movq (%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 40(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm2 \n" // d16
+
+ " movq (%%eax), %%mm3 \n"
+ " pmulhw 40(%5), %%mm3 \n"
+ " movq (%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 8(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm3 \n"
+ " movq (%%ecx), %%mm1 \n"
+ " pmulhw 56(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm3 \n"
+ " movq (%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 24(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm3 \n" // d25
+
+ " movq (%%eax), %%mm4 \n"
+ " pmulhw 56(%5), %%mm4 \n"
+ " movq (%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 40(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm4 \n"
+ " movq (%%ecx), %%mm1 \n"
+ " pmulhw 24(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm4 \n"
+ " movq (%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 8(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm4 \n" // d34
+
+ " movl %0, %%eax \n" // dest
+ " movl %2, %%ebx \n" // dstr
+ " leal (%%ebx, %%ebx, 2), %%edx \n" // dstr*3
+
+ " movq %%mm0, %%mm1 \n"
+ " paddsw 0(%4), %%mm1 \n"
+ " movq %%mm1, (%%eax) \n"
+
+ " movq %%mm2, %%mm1 \n"
+ " paddsw 8(%4), %%mm1 \n"
+ " movq %%mm1, (%%eax, %%ebx, 1) \n"
+
+ " movq %%mm3, %%mm1 \n"
+ " paddsw 16(%4), %%mm1 \n"
+ " movq %%mm1, (%%eax, %%ebx, 2) \n" // s25 + d25
+
+ " movq %%mm4, %%mm1 \n"
+ " paddsw 24(%4), %%mm1 \n"
+ " movq %%mm1, (%%eax, %%edx, 1) \n"
- : "+r" (dest), "+r" (src), "+r" (dstr), "+r" (sstr)
+ " leal (%%eax, %%ebx, 4), %%eax \n"
+ " movq 24(%4), %%mm1 \n"
+ " psubsw %%mm4, %%mm1 \n"
+ " movq %%mm1, (%%eax) \n"
+
+ " movq 16(%4), %%mm1 \n"
+ " psubsw %%mm3, %%mm1 \n"
+ " movq %%mm1, (%%eax, %%ebx, 1) \n"
+
+ " movq 8(%4), %%mm1 \n"
+ " psubsw %%mm2, %%mm1 \n"
+ " movq %%mm1, (%%eax, %%ebx, 2) \n"
+
+ " movq 0(%4), %%mm1 \n"
+ " psubsw %%mm0, %%mm1 \n"
+ " movq %%mm1, (%%eax, %%edx, 1) \n"
+
+ /* right half */
+ " movl %1, %%eax \n" // src
+ " movl %3, %%ebx \n" // sstr
+ " leal (%%eax,%%ebx,4),%%ecx \n" // src + sstr * 4
+
+ " movq 8(%%eax), %%mm0 \n"
+ " movq 8(%%eax), %%mm1 \n"
+ " paddsw 8(%%ecx), %%mm0 \n" // ss07s34
+ " psubsw 8(%%ecx), %%mm1 \n" // ss16s25
+ " pmulhw 32(%5), %%mm0 \n" // .7071
+ " pmulhw 32(%5), %%mm1 \n" // .7071
+
+ " movq 8(%%eax,%%ebx,2), %%mm2 \n"
+ " movq 8(%%eax,%%ebx,2), %%mm3 \n"
+ " movq 8(%%ecx,%%ebx,2), %%mm4 \n"
+ " movq 8(%%ecx,%%ebx,2), %%mm5 \n"
+ " pmulhw 16(%5), %%mm2 \n" // .9239
+ " pmulhw 48(%5), %%mm3 \n" // .3827
+ " pmulhw 48(%5), %%mm4 \n" // .3827
+ " pmulhw 16(%5), %%mm5 \n" // .9239
+ " paddsw %%mm4, %%mm2 \n" // ds07s34
+ " psubsw %%mm5, %%mm3 \n" // ds16s25
+
+ " movq %%mm0, %%mm4 \n"
+ " movq %%mm1, %%mm5 \n"
+ " paddsw %%mm2, %%mm0 \n" // s07
+ " psubsw %%mm2, %%mm4 \n" // s34
+ " paddsw %%mm3, %%mm1 \n" // s16
+ " psubsw %%mm3, %%mm5 \n" // s25
+
+ " movq %%mm0, 0(%4) \n"
+ " movq %%mm1, 8(%4) \n"
+ " movq %%mm5, 16(%4) \n"
+ " movq %%mm4, 24(%4) \n"
+
+ " addl %3, %%eax \n"
+ " addl %3, %%ecx \n"
+
+ " movq 8(%%eax), %%mm0 \n"
+ " pmulhw 8(%5), %%mm0 \n"
+ " movq 8(%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 24(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm0 \n"
+ " movq 8(%%ecx), %%mm1 \n"
+ " pmulhw 40(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm0 \n"
+ " movq 8(%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 56(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm0 \n" // d07
+
+ " movq 8(%%eax), %%mm2 \n"
+ " pmulhw 24(%5), %%mm2 \n"
+ " movq 8(%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 56(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm2 \n"
+ " movq 8(%%ecx), %%mm1 \n"
+ " pmulhw 8(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm2 \n"
+ " movq 8(%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 40(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm2 \n" // d16
+
+ " movq 8(%%eax), %%mm3 \n"
+ " pmulhw 40(%5), %%mm3 \n"
+ " movq 8(%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 8(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm3 \n"
+ " movq 8(%%ecx), %%mm1 \n"
+ " pmulhw 56(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm3 \n"
+ " movq 8(%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 24(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm3 \n" // d25
+
+ " movq 8(%%eax), %%mm4 \n"
+ " pmulhw 56(%5), %%mm4 \n"
+ " movq 8(%%eax,%%ebx,2), %%mm1 \n"
+ " pmulhw 40(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm4 \n"
+ " movq 8(%%ecx), %%mm1 \n"
+ " pmulhw 24(%5), %%mm1 \n"
+ " paddsw %%mm1, %%mm4 \n"
+ " movq 8(%%ecx,%%ebx,2), %%mm1 \n"
+ " pmulhw 8(%5), %%mm1 \n"
+ " psubsw %%mm1, %%mm4 \n" // d34
+
+ " movl %0, %%eax \n" // dest
+ " movl %2, %%ebx \n" // dstr
+ " leal (%%ebx, %%ebx, 2), %%edx \n" // dstr*3
+
+ " movq %%mm0, %%mm1 \n"
+ " paddsw 0(%4), %%mm1 \n"
+ " movq %%mm1, 8(%%eax) \n"
+
+ " movq %%mm2, %%mm1 \n"
+ " paddsw 8(%4), %%mm1 \n"
+ " movq %%mm1, 8(%%eax, %%ebx, 1) \n"
+
+ " movq %%mm3, %%mm1 \n"
+ " paddsw 16(%4), %%mm1 \n"
+ " movq %%mm1, 8(%%eax, %%ebx, 2) \n" // s25 + d25
+
+ " movq %%mm4, %%mm1 \n"
+ " paddsw 24(%4), %%mm1 \n"
+ " movq %%mm1, 8(%%eax, %%edx, 1) \n"
+
+ " leal (%%eax, %%ebx, 4), %%eax \n"
+ " movq 24(%4), %%mm1 \n"
+ " psubsw %%mm4, %%mm1 \n"
+ " movq %%mm1, 8(%%eax) \n"
+
+ " movq 16(%4), %%mm1 \n"
+ " psubsw %%mm3, %%mm1 \n"
+ " movq %%mm1, 8(%%eax, %%ebx, 1) \n"
+
+ " movq 8(%4), %%mm1 \n"
+ " psubsw %%mm2, %%mm1 \n"
+ " movq %%mm1, 8(%%eax, %%ebx, 2) \n"
+
+ " movq 0(%4), %%mm1 \n"
+ " psubsw %%mm0, %%mm1 \n"
+ " movq %%mm1, 8(%%eax, %%edx, 1) \n"
+
+
+ /* rows */
+ " movl %0, %%eax \n" /* dest */
+#define LOOP \
+ " pshufw $0x88, 0(%%eax), %%mm0 \n" /* x0 x2 x0 x2 */ \
+ " pshufw $0x88, 8(%%eax), %%mm1 \n" /* x4 x6 x4 x6 */ \
+ " pmulhw 160(%5), %%mm0 \n" /* 0.707 0.9239 0.707 0.3827 */ \
+ " pmulhw 168(%5), %%mm1 \n" /* 0.707 0.3827 0.707 0.9239 */ \
+ " pmullw 64(%5), %%mm1 \n" /* 1 1 -1 -1 */ \
+ " paddsw %%mm1, %%mm0 \n" /* ss07s34 ds07s34 ss16s25 ds16s25 */ \
+ \
+ " pshufw $0xa0, %%mm0, %%mm1 \n" /* ss07s34 ss07s34 ss16s25 ss16s25 */ \
+ " pshufw $0xf5, %%mm0, %%mm2 \n" /* ds07s34 ds07s34 ds16s25 ds16s25 */ \
+ " pmullw 72(%5), %%mm2 \n" /* 1 -1 1 -1 */ \
+ " paddsw %%mm2, %%mm1 \n" /* s07 s34 s16 s25 */ \
+ " pshufw $0x78, %%mm1, %%mm2 \n" /* s07 s16 s25 s34 */ \
+ \
+ " pshufw $0x55, 0(%%eax), %%mm0 \n" \
+ " pmulhw 96(%5), %%mm0 \n" \
+ " pshufw $0xff, 0(%%eax), %%mm1 \n" \
+ " pmulhw 104(%5), %%mm1 \n" \
+ " pmullw 112(%5), %%mm1 \n" \
+ " paddsw %%mm1, %%mm0 \n" \
+ " pshufw $0x55, 8(%%eax), %%mm1 \n" \
+ " pmulhw 120(%5), %%mm1 \n" \
+ " pmullw 128(%5), %%mm1 \n" \
+ " paddsw %%mm1, %%mm0 \n" \
+ " pshufw $0xff, 8(%%eax), %%mm1 \n" \
+ " pmulhw 136(%5), %%mm1 \n" \
+ " pmullw 144(%5), %%mm1 \n" \
+ " paddsw %%mm1, %%mm0 \n" \
+ \
+ " movq %%mm2, %%mm1 \n" \
+ " paddsw %%mm0, %%mm1 \n" \
+ " psubsw %%mm0, %%mm2 \n" \
+ " pshufw $0x1b, %%mm2, %%mm2 \n" \
+ \
+ " movq %%mm1, 0(%%eax) \n" \
+ " movq %%mm2, 8(%%eax) \n" \
+ " addl %3, %%eax \n"
+
+ LOOP
+ LOOP
+ LOOP
+ LOOP
+ LOOP
+ LOOP
+ LOOP
+ LOOP
+#undef LOOP
+
+ " emms \n"
:
- : "ebx");
+ : "m" (dest), "m" (src), "m" (dstr), "m" (sstr), "r" (tmp), "r" (dct_mmx_constants)
+ : "eax", "ebx", "ecx", "edx");
}
OIL_DEFINE_IMPL_FULL (idct8x8_s16_mmx, idct8x8_s16, OIL_IMPL_FLAG_MMX);
-#endif
+#if 0
#define CONST(x) (32768.0*(x) + 0.5)
#define C1_0000 (32767)
@@ -87,6 +424,7 @@ dct_mmx_constants [][4] = {
{ C0_1951, C0_5556, C0_8315, C0_9808 }, // 136
{ 1, -1, 1, -1 },
};
+#endif
/* a 3dnow version can use pmulhrw instead of pmulhw for increased
* accuracy */
@@ -98,7 +436,6 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
asm volatile (
/* Note: this asm is unclean with %ebx, but it's not an issue
* in this particular case. */
-#if 1
/* first half */
" movl %1, %%eax \n" // src
" movl %3, %%ebx \n" // sstr
@@ -333,12 +670,8 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
" pmulhw 8(%5), %%mm1 \n"
" psubsw %%mm1, %%mm0 \n"
" movq %%mm0, (%%eax,%%edx) \n"
-#endif
-// " movl %1, %%eax \n" // src
" movl %0, %%ecx \n" // dest
-// " movl $8, %%edx \n"
-// "1: \n"
#define LOOP \
" movq (%%ecx), %%mm0 \n" \
@@ -397,9 +730,6 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr)
LOOP
LOOP
-// " decl %%edx \n"
-// " jne 1b\n"
-
" emms \n"
:
: "m" (dest), "m" (src), "m" (dstr), "m" (sstr), "r" (tmp), "r" (dct_mmx_constants)
diff --git a/liboil/dct/idct8x8theora_ref.c b/liboil/dct/idct8x8theora_ref.c
new file mode 100644
index 0000000..8b0c50b
--- /dev/null
+++ b/liboil/dct/idct8x8theora_ref.c
@@ -0,0 +1,200 @@
+/*
+ * LIBOIL - Library of Optimized Inner Loops
+ * Copyright (c) 2001,2002,2003,2004 David A. Schleef <ds@schleef.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <liboil/liboil.h>
+#include <liboil/liboiltest.h>
+#include <liboil/liboilrandom.h>
+#include <liboil/dct/dct.h>
+#include <math.h>
+
+static void
+idct8theora_s16_test (OilTest *test)
+{
+ int i;
+ int stride = test->params[OIL_ARG_SSTR1].value;
+ uint16_t *ptr = (uint16_t *)(test->params[OIL_ARG_SRC1].src_data +
+ OIL_TEST_HEADER);
+
+ for(i=0;i<8;i++){
+ OIL_GET(ptr, i*stride, int16_t) = oil_rand_s16() >> 3;
+ //OIL_GET(ptr, i*stride, int16_t) = 0;
+ }
+ //OIL_GET(ptr, 0*stride, int16_t) = 100;
+
+}
+
+static void
+idct8x8theora_s16_test (OilTest *test)
+{
+ int i;
+ int j;
+ int stride = test->params[OIL_ARG_SSTR1].value;
+ uint16_t *ptr = (uint16_t *)(test->params[OIL_ARG_SRC1].src_data +
+ OIL_TEST_HEADER);
+
+ for(i=0;i<8;i++){
+ for(j=0;j<8;j++){
+ OIL_GET(ptr, i*stride + j*2, int16_t) = oil_rand_s16() >> 3;
+ }
+ }
+
+}
+
+OIL_DEFINE_CLASS_FULL (idct8theora_s16, "int16_t *d_8, int dstr, int16_t *s_8, int sstr", idct8theora_s16_test);
+OIL_DEFINE_CLASS_FULL (idct8x8theora_s16, "int16_t *d_8x8, int dstr, int16_t *s_8x8, int sstr", idct8x8theora_s16_test);
+
+
+
+#define C1 64277
+#define C2 60547
+#define C3 54491
+#define C4 46341
+#define C5 36410
+#define C6 25080
+#define C7 12785
+
+#define S7 64277
+#define S6 60547
+#define S5 54491
+#define S4 46341
+#define S3 36410
+#define S2 25080
+#define S1 12785
+
+#define TRUNC(x) ((int16_t)x)
+#define MULT(a,b) (((a)*(b))>>16)
+
+static void
+idct8theora_s16_ref (int16_t *dest, int dstr, int16_t *src, int sstr)
+{
+ int32_t t[10];
+ int32_t r;
+
+#define Y(i) OIL_GET(src,sstr*(i),int16_t)
+#define X(i) OIL_GET(dest,sstr*(i),int16_t)
+
+ /* the ordering here corresponds closely to the theora spec */
+ t[0] = MULT(C4, Y(0) + Y(4));
+ t[0] = TRUNC(t[0]);
+ t[1] = MULT(C4, Y(0) - Y(4));
+ t[1] = TRUNC(t[1]);
+ t[2] = MULT(C6, Y(2)) - MULT(S6, Y(6));
+ t[3] = MULT(S6, Y(2)) + MULT(C6, Y(6));
+ t[4] = MULT(C7, Y(1)) - MULT(S7, Y(7));
+ t[5] = MULT(C3, Y(5)) - MULT(S3, Y(3));
+ t[6] = MULT(S3, Y(5)) + MULT(C3, Y(3));
+ t[7] = MULT(S7, Y(1)) + MULT(C7, Y(7));
+ r = t[4] + t[5];
+ t[5] = MULT(C4, t[4] - t[5]);
+ t[5] = TRUNC(t[5]);
+ t[4] = r;
+ r = t[7] + t[6];
+ t[6] = MULT(C4, t[7] - t[6]);
+ t[6] = TRUNC(t[6]);
+ t[7] = r;
+ r = t[0] + t[3];
+ t[3] = t[0] - t[3];
+ t[0] = r;
+ r = t[1] + t[2];
+ t[2] = t[1] - t[2];
+ t[1] = r;
+ r = t[6] + t[5];
+ t[5] = t[6] - t[5];
+ t[6] = r;
+ r = t[0] + t[7];
+ r = TRUNC(r);
+ X(0) = r;
+ r = t[1] + t[6];
+ r = TRUNC(r);
+ X(1) = r;
+ r = t[2] + t[5];
+ r = TRUNC(r);
+ X(2) = r;
+ r = t[3] + t[4];
+ r = TRUNC(r);
+ X(3) = r;
+ r = t[3] - t[4];
+ r = TRUNC(r);
+ X(4) = r;
+ r = t[2] - t[5];
+ r = TRUNC(r);
+ X(5) = r;
+ r = t[1] - t[6];
+ r = TRUNC(r);
+ X(6) = r;
+ r = t[0] - t[7];
+ r = TRUNC(r);
+ X(7) = r;
+}
+OIL_DEFINE_IMPL_REF (idct8theora_s16_ref, idct8theora_s16);
+
+
+#if defined(oil_idct8theora_s16)
+static void
+idct8x8theora_s16_ref (int16_t *dest, int dstr, int16_t *src, int sstr)
+{
+ int i;
+ int16_t tmp[64];
+
+ for(i=0;i<8;i++){
+ oil_idct8theora_s16(
+ OIL_OFFSET(tmp, 8*sizeof(int16_t) * i), sizeof(int16_t),
+ OIL_OFFSET(src, sstr * i), sizeof(int16_t));
+ }
+ for(i=0;i<8;i++){
+ oil_idct8theora_s16(
+ OIL_OFFSET(dest, sizeof(int16_t) * i), dstr,
+ OIL_OFFSET(tmp, sizeof(int16_t) * i), sizeof(int16_t) * i);
+ }
+}
+OIL_DEFINE_IMPL_REF (idct8x8theora_s16_ref, idct8x8theora_s16);
+#endif
+
+
+#if defined(oil_idct8_f64)
+static void
+idct8theora_s16_float (int16_t *dest, int dstr, int16_t *src, int sstr)
+{
+ int i;
+ double tmp1[8];
+ double tmp2[8];
+
+ oil_conv_f64_s16 (tmp1, sizeof(double), src, sizeof(int16_t), 8);
+ oil_idct8_f64 (tmp2, sizeof(double), tmp1, sizeof(double));
+ for(i=0;i<8;i++){
+ tmp2[i] *= 2.0;
+ }
+ oil_conv_s16_f64 (dest, sizeof(int16_t), tmp2, sizeof(double), 8);
+}
+OIL_DEFINE_IMPL_REF (idct8theora_s16_float, idct8theora_s16);
+#endif
+
+