diff options
author | David Schleef <ds@schleef.org> | 2005-04-30 06:00:56 +0000 |
---|---|---|
committer | David Schleef <ds@schleef.org> | 2005-04-30 06:00:56 +0000 |
commit | 81e196203fe61e8dedc5f10e03b9ec7c9d2a7eff (patch) | |
tree | ed85b753dc0cc8ef5c1083c2f704e150cd52b504 /liboil/dct | |
parent | eb6ae36041277cde75b484d3e0fc35277f83a52a (diff) | |
download | liboil-81e196203fe61e8dedc5f10e03b9ec7c9d2a7eff.tar.gz |
* examples/Makefile.am: add oil-test
* examples/oil-test.c: A copy of work.c modified for displaying
test results for any class.
* liboil/dct/Makefile.am:
* liboil/dct/idct8x8_i386.c: (idct8x8_s16_mmx), (fdct8x8s_s16_mmx):
Add mmx code for idct
* liboil/dct/idct8x8theora_ref.c: Add some classes for idct8x8
to the theora spec.
* liboil/liboilfuncs.h: update
Diffstat (limited to 'liboil/dct')
-rw-r--r-- | liboil/dct/Makefile.am | 3 | ||||
-rw-r--r-- | liboil/dct/idct8x8_i386.c | 356 | ||||
-rw-r--r-- | liboil/dct/idct8x8theora_ref.c | 200 |
3 files changed, 545 insertions, 14 deletions
diff --git a/liboil/dct/Makefile.am b/liboil/dct/Makefile.am index 9744e53..a407a45 100644 --- a/liboil/dct/Makefile.am +++ b/liboil/dct/Makefile.am @@ -25,7 +25,8 @@ c_sources = \ fdct8x8s_s16.c \ idct8_f64.c \ idct8x8_c.c \ - imdct32_f32.c + imdct32_f32.c \ + idct8x8theora_ref.c libdct_la_SOURCES = \ $(c_sources) \ diff --git a/liboil/dct/idct8x8_i386.c b/liboil/dct/idct8x8_i386.c index 0f68c72..c1d79e9 100644 --- a/liboil/dct/idct8x8_i386.c +++ b/liboil/dct/idct8x8_i386.c @@ -37,20 +37,357 @@ OIL_DECLARE_CLASS (idct8x8_s16); OIL_DECLARE_CLASS (dct8x8_s16); -#if 0 +#define CONST(x) (32768.0*(x) + 0.5) + +#define C1_0000 (32767) +#define C0_9808 CONST(0.980785280) +#define C0_9239 CONST(0.923879532) +#define C0_8315 CONST(0.831469612) +#define C0_7071 CONST(0.707106781) +#define C0_5556 CONST(0.555570233) +#define C0_3827 CONST(0.382683432) +#define C0_1951 CONST(0.195090322) + +#define FOUR(x) { x, x, x, x } +#define MMX_CONST(x) {32768.0*(x) + 0.5,32768.0*(x) + 0.5,32768.0*(x) + 0.5,32768.0*(x) + 0.5} + +static const int16_t +dct_mmx_constants [][4] = { + FOUR(0), + FOUR(C0_9808), + FOUR(C0_9239), + FOUR(C0_8315), + FOUR(C0_7071), + FOUR(C0_5556), + FOUR(C0_3827), + FOUR(C0_1951), + { 1, 1, -1, -1 }, // 64 + { 1, -1, 1, -1 }, + { C1_0000, C0_9239, C0_7071, C0_3827 }, // 80 + { C1_0000, C0_3827, C0_7071, C0_9239 }, // 88 + { C0_9808, C0_8315, C0_5556, C0_1951 }, // 96 + { C0_8315, C0_1951, C0_9808, C0_5556 }, // 104 + { 1, -1, -1, -1 }, + { C0_5556, C0_9808, C0_1951, C0_8315 }, // 120 + { 1, -1, 1, 1 }, + { C0_1951, C0_5556, C0_8315, C0_9808 }, // 136 + { 1, -1, 1, -1 }, + FOUR(CONST(0.5)), //152 + { C0_7071, C0_9239, C0_7071, C0_3827 }, // 160 + { C0_7071, C0_3827, C0_7071, C0_9239 }, // 168 +}; + static void idct8x8_s16_mmx (int16_t *dest, int dstr, int16_t *src, int sstr) { + int32_t tmp[32]; + asm volatile ( - "" + /* left half */ + " movl %1, %%eax \n" // src + " movl %3, %%ebx \n" // sstr + " leal (%%eax,%%ebx,4),%%ecx \n" // src + sstr * 4 + + " movq (%%eax), %%mm0 \n" + " movq (%%eax), %%mm1 \n" + " paddsw (%%ecx), %%mm0 \n" // ss07s34 + " psubsw (%%ecx), %%mm1 \n" // ss16s25 + " pmulhw 32(%5), %%mm0 \n" // .7071 + " pmulhw 32(%5), %%mm1 \n" // .7071 + + " movq (%%eax,%%ebx,2), %%mm2 \n" + " movq (%%eax,%%ebx,2), %%mm3 \n" + " movq (%%ecx,%%ebx,2), %%mm4 \n" + " movq (%%ecx,%%ebx,2), %%mm5 \n" + " pmulhw 16(%5), %%mm2 \n" // .9239 + " pmulhw 48(%5), %%mm3 \n" // .3827 + " pmulhw 48(%5), %%mm4 \n" // .3827 + " pmulhw 16(%5), %%mm5 \n" // .9239 + " paddsw %%mm4, %%mm2 \n" // ds07s34 + " psubsw %%mm5, %%mm3 \n" // ds16s25 + + " movq %%mm0, %%mm4 \n" + " movq %%mm1, %%mm5 \n" + " paddsw %%mm2, %%mm0 \n" // s07 + " psubsw %%mm2, %%mm4 \n" // s34 + " paddsw %%mm3, %%mm1 \n" // s16 + " psubsw %%mm3, %%mm5 \n" // s25 + + " movq %%mm0, 0(%4) \n" + " movq %%mm1, 8(%4) \n" + " movq %%mm5, 16(%4) \n" + " movq %%mm4, 24(%4) \n" + + " addl %3, %%eax \n" + " addl %3, %%ecx \n" + + " movq (%%eax), %%mm0 \n" + " pmulhw 8(%5), %%mm0 \n" + " movq (%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 24(%5), %%mm1 \n" + " paddsw %%mm1, %%mm0 \n" + " movq (%%ecx), %%mm1 \n" + " pmulhw 40(%5), %%mm1 \n" + " paddsw %%mm1, %%mm0 \n" + " movq (%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 56(%5), %%mm1 \n" + " paddsw %%mm1, %%mm0 \n" // d07 + + " movq (%%eax), %%mm2 \n" + " pmulhw 24(%5), %%mm2 \n" + " movq (%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 56(%5), %%mm1 \n" + " psubsw %%mm1, %%mm2 \n" + " movq (%%ecx), %%mm1 \n" + " pmulhw 8(%5), %%mm1 \n" + " psubsw %%mm1, %%mm2 \n" + " movq (%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 40(%5), %%mm1 \n" + " psubsw %%mm1, %%mm2 \n" // d16 + + " movq (%%eax), %%mm3 \n" + " pmulhw 40(%5), %%mm3 \n" + " movq (%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 8(%5), %%mm1 \n" + " psubsw %%mm1, %%mm3 \n" + " movq (%%ecx), %%mm1 \n" + " pmulhw 56(%5), %%mm1 \n" + " paddsw %%mm1, %%mm3 \n" + " movq (%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 24(%5), %%mm1 \n" + " paddsw %%mm1, %%mm3 \n" // d25 + + " movq (%%eax), %%mm4 \n" + " pmulhw 56(%5), %%mm4 \n" + " movq (%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 40(%5), %%mm1 \n" + " psubsw %%mm1, %%mm4 \n" + " movq (%%ecx), %%mm1 \n" + " pmulhw 24(%5), %%mm1 \n" + " paddsw %%mm1, %%mm4 \n" + " movq (%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 8(%5), %%mm1 \n" + " psubsw %%mm1, %%mm4 \n" // d34 + + " movl %0, %%eax \n" // dest + " movl %2, %%ebx \n" // dstr + " leal (%%ebx, %%ebx, 2), %%edx \n" // dstr*3 + + " movq %%mm0, %%mm1 \n" + " paddsw 0(%4), %%mm1 \n" + " movq %%mm1, (%%eax) \n" + + " movq %%mm2, %%mm1 \n" + " paddsw 8(%4), %%mm1 \n" + " movq %%mm1, (%%eax, %%ebx, 1) \n" + + " movq %%mm3, %%mm1 \n" + " paddsw 16(%4), %%mm1 \n" + " movq %%mm1, (%%eax, %%ebx, 2) \n" // s25 + d25 + + " movq %%mm4, %%mm1 \n" + " paddsw 24(%4), %%mm1 \n" + " movq %%mm1, (%%eax, %%edx, 1) \n" - : "+r" (dest), "+r" (src), "+r" (dstr), "+r" (sstr) + " leal (%%eax, %%ebx, 4), %%eax \n" + " movq 24(%4), %%mm1 \n" + " psubsw %%mm4, %%mm1 \n" + " movq %%mm1, (%%eax) \n" + + " movq 16(%4), %%mm1 \n" + " psubsw %%mm3, %%mm1 \n" + " movq %%mm1, (%%eax, %%ebx, 1) \n" + + " movq 8(%4), %%mm1 \n" + " psubsw %%mm2, %%mm1 \n" + " movq %%mm1, (%%eax, %%ebx, 2) \n" + + " movq 0(%4), %%mm1 \n" + " psubsw %%mm0, %%mm1 \n" + " movq %%mm1, (%%eax, %%edx, 1) \n" + + /* right half */ + " movl %1, %%eax \n" // src + " movl %3, %%ebx \n" // sstr + " leal (%%eax,%%ebx,4),%%ecx \n" // src + sstr * 4 + + " movq 8(%%eax), %%mm0 \n" + " movq 8(%%eax), %%mm1 \n" + " paddsw 8(%%ecx), %%mm0 \n" // ss07s34 + " psubsw 8(%%ecx), %%mm1 \n" // ss16s25 + " pmulhw 32(%5), %%mm0 \n" // .7071 + " pmulhw 32(%5), %%mm1 \n" // .7071 + + " movq 8(%%eax,%%ebx,2), %%mm2 \n" + " movq 8(%%eax,%%ebx,2), %%mm3 \n" + " movq 8(%%ecx,%%ebx,2), %%mm4 \n" + " movq 8(%%ecx,%%ebx,2), %%mm5 \n" + " pmulhw 16(%5), %%mm2 \n" // .9239 + " pmulhw 48(%5), %%mm3 \n" // .3827 + " pmulhw 48(%5), %%mm4 \n" // .3827 + " pmulhw 16(%5), %%mm5 \n" // .9239 + " paddsw %%mm4, %%mm2 \n" // ds07s34 + " psubsw %%mm5, %%mm3 \n" // ds16s25 + + " movq %%mm0, %%mm4 \n" + " movq %%mm1, %%mm5 \n" + " paddsw %%mm2, %%mm0 \n" // s07 + " psubsw %%mm2, %%mm4 \n" // s34 + " paddsw %%mm3, %%mm1 \n" // s16 + " psubsw %%mm3, %%mm5 \n" // s25 + + " movq %%mm0, 0(%4) \n" + " movq %%mm1, 8(%4) \n" + " movq %%mm5, 16(%4) \n" + " movq %%mm4, 24(%4) \n" + + " addl %3, %%eax \n" + " addl %3, %%ecx \n" + + " movq 8(%%eax), %%mm0 \n" + " pmulhw 8(%5), %%mm0 \n" + " movq 8(%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 24(%5), %%mm1 \n" + " paddsw %%mm1, %%mm0 \n" + " movq 8(%%ecx), %%mm1 \n" + " pmulhw 40(%5), %%mm1 \n" + " paddsw %%mm1, %%mm0 \n" + " movq 8(%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 56(%5), %%mm1 \n" + " paddsw %%mm1, %%mm0 \n" // d07 + + " movq 8(%%eax), %%mm2 \n" + " pmulhw 24(%5), %%mm2 \n" + " movq 8(%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 56(%5), %%mm1 \n" + " psubsw %%mm1, %%mm2 \n" + " movq 8(%%ecx), %%mm1 \n" + " pmulhw 8(%5), %%mm1 \n" + " psubsw %%mm1, %%mm2 \n" + " movq 8(%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 40(%5), %%mm1 \n" + " psubsw %%mm1, %%mm2 \n" // d16 + + " movq 8(%%eax), %%mm3 \n" + " pmulhw 40(%5), %%mm3 \n" + " movq 8(%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 8(%5), %%mm1 \n" + " psubsw %%mm1, %%mm3 \n" + " movq 8(%%ecx), %%mm1 \n" + " pmulhw 56(%5), %%mm1 \n" + " paddsw %%mm1, %%mm3 \n" + " movq 8(%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 24(%5), %%mm1 \n" + " paddsw %%mm1, %%mm3 \n" // d25 + + " movq 8(%%eax), %%mm4 \n" + " pmulhw 56(%5), %%mm4 \n" + " movq 8(%%eax,%%ebx,2), %%mm1 \n" + " pmulhw 40(%5), %%mm1 \n" + " psubsw %%mm1, %%mm4 \n" + " movq 8(%%ecx), %%mm1 \n" + " pmulhw 24(%5), %%mm1 \n" + " paddsw %%mm1, %%mm4 \n" + " movq 8(%%ecx,%%ebx,2), %%mm1 \n" + " pmulhw 8(%5), %%mm1 \n" + " psubsw %%mm1, %%mm4 \n" // d34 + + " movl %0, %%eax \n" // dest + " movl %2, %%ebx \n" // dstr + " leal (%%ebx, %%ebx, 2), %%edx \n" // dstr*3 + + " movq %%mm0, %%mm1 \n" + " paddsw 0(%4), %%mm1 \n" + " movq %%mm1, 8(%%eax) \n" + + " movq %%mm2, %%mm1 \n" + " paddsw 8(%4), %%mm1 \n" + " movq %%mm1, 8(%%eax, %%ebx, 1) \n" + + " movq %%mm3, %%mm1 \n" + " paddsw 16(%4), %%mm1 \n" + " movq %%mm1, 8(%%eax, %%ebx, 2) \n" // s25 + d25 + + " movq %%mm4, %%mm1 \n" + " paddsw 24(%4), %%mm1 \n" + " movq %%mm1, 8(%%eax, %%edx, 1) \n" + + " leal (%%eax, %%ebx, 4), %%eax \n" + " movq 24(%4), %%mm1 \n" + " psubsw %%mm4, %%mm1 \n" + " movq %%mm1, 8(%%eax) \n" + + " movq 16(%4), %%mm1 \n" + " psubsw %%mm3, %%mm1 \n" + " movq %%mm1, 8(%%eax, %%ebx, 1) \n" + + " movq 8(%4), %%mm1 \n" + " psubsw %%mm2, %%mm1 \n" + " movq %%mm1, 8(%%eax, %%ebx, 2) \n" + + " movq 0(%4), %%mm1 \n" + " psubsw %%mm0, %%mm1 \n" + " movq %%mm1, 8(%%eax, %%edx, 1) \n" + + + /* rows */ + " movl %0, %%eax \n" /* dest */ +#define LOOP \ + " pshufw $0x88, 0(%%eax), %%mm0 \n" /* x0 x2 x0 x2 */ \ + " pshufw $0x88, 8(%%eax), %%mm1 \n" /* x4 x6 x4 x6 */ \ + " pmulhw 160(%5), %%mm0 \n" /* 0.707 0.9239 0.707 0.3827 */ \ + " pmulhw 168(%5), %%mm1 \n" /* 0.707 0.3827 0.707 0.9239 */ \ + " pmullw 64(%5), %%mm1 \n" /* 1 1 -1 -1 */ \ + " paddsw %%mm1, %%mm0 \n" /* ss07s34 ds07s34 ss16s25 ds16s25 */ \ + \ + " pshufw $0xa0, %%mm0, %%mm1 \n" /* ss07s34 ss07s34 ss16s25 ss16s25 */ \ + " pshufw $0xf5, %%mm0, %%mm2 \n" /* ds07s34 ds07s34 ds16s25 ds16s25 */ \ + " pmullw 72(%5), %%mm2 \n" /* 1 -1 1 -1 */ \ + " paddsw %%mm2, %%mm1 \n" /* s07 s34 s16 s25 */ \ + " pshufw $0x78, %%mm1, %%mm2 \n" /* s07 s16 s25 s34 */ \ + \ + " pshufw $0x55, 0(%%eax), %%mm0 \n" \ + " pmulhw 96(%5), %%mm0 \n" \ + " pshufw $0xff, 0(%%eax), %%mm1 \n" \ + " pmulhw 104(%5), %%mm1 \n" \ + " pmullw 112(%5), %%mm1 \n" \ + " paddsw %%mm1, %%mm0 \n" \ + " pshufw $0x55, 8(%%eax), %%mm1 \n" \ + " pmulhw 120(%5), %%mm1 \n" \ + " pmullw 128(%5), %%mm1 \n" \ + " paddsw %%mm1, %%mm0 \n" \ + " pshufw $0xff, 8(%%eax), %%mm1 \n" \ + " pmulhw 136(%5), %%mm1 \n" \ + " pmullw 144(%5), %%mm1 \n" \ + " paddsw %%mm1, %%mm0 \n" \ + \ + " movq %%mm2, %%mm1 \n" \ + " paddsw %%mm0, %%mm1 \n" \ + " psubsw %%mm0, %%mm2 \n" \ + " pshufw $0x1b, %%mm2, %%mm2 \n" \ + \ + " movq %%mm1, 0(%%eax) \n" \ + " movq %%mm2, 8(%%eax) \n" \ + " addl %3, %%eax \n" + + LOOP + LOOP + LOOP + LOOP + LOOP + LOOP + LOOP + LOOP +#undef LOOP + + " emms \n" : - : "ebx"); + : "m" (dest), "m" (src), "m" (dstr), "m" (sstr), "r" (tmp), "r" (dct_mmx_constants) + : "eax", "ebx", "ecx", "edx"); } OIL_DEFINE_IMPL_FULL (idct8x8_s16_mmx, idct8x8_s16, OIL_IMPL_FLAG_MMX); -#endif +#if 0 #define CONST(x) (32768.0*(x) + 0.5) #define C1_0000 (32767) @@ -87,6 +424,7 @@ dct_mmx_constants [][4] = { { C0_1951, C0_5556, C0_8315, C0_9808 }, // 136 { 1, -1, 1, -1 }, }; +#endif /* a 3dnow version can use pmulhrw instead of pmulhw for increased * accuracy */ @@ -98,7 +436,6 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) asm volatile ( /* Note: this asm is unclean with %ebx, but it's not an issue * in this particular case. */ -#if 1 /* first half */ " movl %1, %%eax \n" // src " movl %3, %%ebx \n" // sstr @@ -333,12 +670,8 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) " pmulhw 8(%5), %%mm1 \n" " psubsw %%mm1, %%mm0 \n" " movq %%mm0, (%%eax,%%edx) \n" -#endif -// " movl %1, %%eax \n" // src " movl %0, %%ecx \n" // dest -// " movl $8, %%edx \n" -// "1: \n" #define LOOP \ " movq (%%ecx), %%mm0 \n" \ @@ -397,9 +730,6 @@ fdct8x8s_s16_mmx (uint16_t *dest, int dstr, uint16_t *src, int sstr) LOOP LOOP -// " decl %%edx \n" -// " jne 1b\n" - " emms \n" : : "m" (dest), "m" (src), "m" (dstr), "m" (sstr), "r" (tmp), "r" (dct_mmx_constants) diff --git a/liboil/dct/idct8x8theora_ref.c b/liboil/dct/idct8x8theora_ref.c new file mode 100644 index 0000000..8b0c50b --- /dev/null +++ b/liboil/dct/idct8x8theora_ref.c @@ -0,0 +1,200 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2001,2002,2003,2004 David A. Schleef <ds@schleef.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <liboil/liboil.h> +#include <liboil/liboiltest.h> +#include <liboil/liboilrandom.h> +#include <liboil/dct/dct.h> +#include <math.h> + +static void +idct8theora_s16_test (OilTest *test) +{ + int i; + int stride = test->params[OIL_ARG_SSTR1].value; + uint16_t *ptr = (uint16_t *)(test->params[OIL_ARG_SRC1].src_data + + OIL_TEST_HEADER); + + for(i=0;i<8;i++){ + OIL_GET(ptr, i*stride, int16_t) = oil_rand_s16() >> 3; + //OIL_GET(ptr, i*stride, int16_t) = 0; + } + //OIL_GET(ptr, 0*stride, int16_t) = 100; + +} + +static void +idct8x8theora_s16_test (OilTest *test) +{ + int i; + int j; + int stride = test->params[OIL_ARG_SSTR1].value; + uint16_t *ptr = (uint16_t *)(test->params[OIL_ARG_SRC1].src_data + + OIL_TEST_HEADER); + + for(i=0;i<8;i++){ + for(j=0;j<8;j++){ + OIL_GET(ptr, i*stride + j*2, int16_t) = oil_rand_s16() >> 3; + } + } + +} + +OIL_DEFINE_CLASS_FULL (idct8theora_s16, "int16_t *d_8, int dstr, int16_t *s_8, int sstr", idct8theora_s16_test); +OIL_DEFINE_CLASS_FULL (idct8x8theora_s16, "int16_t *d_8x8, int dstr, int16_t *s_8x8, int sstr", idct8x8theora_s16_test); + + + +#define C1 64277 +#define C2 60547 +#define C3 54491 +#define C4 46341 +#define C5 36410 +#define C6 25080 +#define C7 12785 + +#define S7 64277 +#define S6 60547 +#define S5 54491 +#define S4 46341 +#define S3 36410 +#define S2 25080 +#define S1 12785 + +#define TRUNC(x) ((int16_t)x) +#define MULT(a,b) (((a)*(b))>>16) + +static void +idct8theora_s16_ref (int16_t *dest, int dstr, int16_t *src, int sstr) +{ + int32_t t[10]; + int32_t r; + +#define Y(i) OIL_GET(src,sstr*(i),int16_t) +#define X(i) OIL_GET(dest,sstr*(i),int16_t) + + /* the ordering here corresponds closely to the theora spec */ + t[0] = MULT(C4, Y(0) + Y(4)); + t[0] = TRUNC(t[0]); + t[1] = MULT(C4, Y(0) - Y(4)); + t[1] = TRUNC(t[1]); + t[2] = MULT(C6, Y(2)) - MULT(S6, Y(6)); + t[3] = MULT(S6, Y(2)) + MULT(C6, Y(6)); + t[4] = MULT(C7, Y(1)) - MULT(S7, Y(7)); + t[5] = MULT(C3, Y(5)) - MULT(S3, Y(3)); + t[6] = MULT(S3, Y(5)) + MULT(C3, Y(3)); + t[7] = MULT(S7, Y(1)) + MULT(C7, Y(7)); + r = t[4] + t[5]; + t[5] = MULT(C4, t[4] - t[5]); + t[5] = TRUNC(t[5]); + t[4] = r; + r = t[7] + t[6]; + t[6] = MULT(C4, t[7] - t[6]); + t[6] = TRUNC(t[6]); + t[7] = r; + r = t[0] + t[3]; + t[3] = t[0] - t[3]; + t[0] = r; + r = t[1] + t[2]; + t[2] = t[1] - t[2]; + t[1] = r; + r = t[6] + t[5]; + t[5] = t[6] - t[5]; + t[6] = r; + r = t[0] + t[7]; + r = TRUNC(r); + X(0) = r; + r = t[1] + t[6]; + r = TRUNC(r); + X(1) = r; + r = t[2] + t[5]; + r = TRUNC(r); + X(2) = r; + r = t[3] + t[4]; + r = TRUNC(r); + X(3) = r; + r = t[3] - t[4]; + r = TRUNC(r); + X(4) = r; + r = t[2] - t[5]; + r = TRUNC(r); + X(5) = r; + r = t[1] - t[6]; + r = TRUNC(r); + X(6) = r; + r = t[0] - t[7]; + r = TRUNC(r); + X(7) = r; +} +OIL_DEFINE_IMPL_REF (idct8theora_s16_ref, idct8theora_s16); + + +#if defined(oil_idct8theora_s16) +static void +idct8x8theora_s16_ref (int16_t *dest, int dstr, int16_t *src, int sstr) +{ + int i; + int16_t tmp[64]; + + for(i=0;i<8;i++){ + oil_idct8theora_s16( + OIL_OFFSET(tmp, 8*sizeof(int16_t) * i), sizeof(int16_t), + OIL_OFFSET(src, sstr * i), sizeof(int16_t)); + } + for(i=0;i<8;i++){ + oil_idct8theora_s16( + OIL_OFFSET(dest, sizeof(int16_t) * i), dstr, + OIL_OFFSET(tmp, sizeof(int16_t) * i), sizeof(int16_t) * i); + } +} +OIL_DEFINE_IMPL_REF (idct8x8theora_s16_ref, idct8x8theora_s16); +#endif + + +#if defined(oil_idct8_f64) +static void +idct8theora_s16_float (int16_t *dest, int dstr, int16_t *src, int sstr) +{ + int i; + double tmp1[8]; + double tmp2[8]; + + oil_conv_f64_s16 (tmp1, sizeof(double), src, sizeof(int16_t), 8); + oil_idct8_f64 (tmp2, sizeof(double), tmp1, sizeof(double)); + for(i=0;i<8;i++){ + tmp2[i] *= 2.0; + } + oil_conv_s16_f64 (dest, sizeof(int16_t), tmp2, sizeof(double), 8); +} +OIL_DEFINE_IMPL_REF (idct8theora_s16_float, idct8theora_s16); +#endif + + |