diff options
author | Fabrice Bellard <fabrice@bellard.org> | 2001-07-22 14:18:56 +0000 |
---|---|---|
committer | Fabrice Bellard <fabrice@bellard.org> | 2001-07-22 14:18:56 +0000 |
commit | de6d9b6404bfd1c589799142da5a95428f146edd (patch) | |
tree | 75ae0cbb74bdfafb6f1a40922db111a103db3bcf /libavcodec | |
parent | 1b58d58ddaf8a8c766a0353885ff504babed0453 (diff) | |
download | ffmpeg-de6d9b6404bfd1c589799142da5a95428f146edd.tar.gz |
Initial revision
Originally committed as revision 5 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
52 files changed, 23294 insertions, 0 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile new file mode 100644 index 0000000000..a418516b16 --- /dev/null +++ b/libavcodec/Makefile @@ -0,0 +1,59 @@ +include ../config.mk +CFLAGS= -O2 -Wall -g +LDFLAGS= -g + +OBJS= common.o utils.o mpegvideo.o h263.o jrevdct.o jfdctfst.o \ + mpegaudio.o ac3enc.o mjpegenc.o resample.o dsputil.o \ + motion_est.o imgconvert.o imgresample.o msmpeg4.o \ + mpeg12.o h263dec.o rv10.o + +# currently using libac3 for ac3 decoding +OBJS+= ac3dec.o \ + libac3/bit_allocate.o libac3/bitstream.o libac3/downmix.o \ + libac3/imdct.o libac3/parse.o + +# currently using mpglib for mpeg audio decoding +OBJS+= mpegaudiodec.o \ + mpglib/layer1.o mpglib/layer2.o mpglib/layer3.o \ + mpglib/dct64_i386.o mpglib/decode_i386.o mpglib/tabinit.o + +# i386 mmx specific stuff +ifdef CONFIG_MMX +OBJS += i386/fdct_mmx.o i386/fdctdata.o i386/sad_mmx.o i386/cputest.o \ + i386/dsputil_mmx.o +endif + +LIB= libavcodec.a +TESTS= imgresample-test dct-test + +all: $(LIB) apiexample + +$(LIB): $(OBJS) + rm -f $@ + $(AR) rcs $@ $(OBJS) + +dsputil.o: dsputil.c dsputil.h + +%.o: %.c + $(CC) $(CFLAGS) -c -o $@ $< + +%.o: %.s + nasm -f elf -o $@ $< + +clean: + rm -f *.o *~ *.a i386/*.o i386/*~ \ + libac3/*.o libac3/*~ \ + mpglib/*.o mpglib/*~ \ + apiexample $(TESTS) + +# api example program +apiexample: apiexample.c $(LIB) + $(CC) $(CFLAGS) -o $@ $< $(LIB) -lm + +# testing progs + +imgresample-test: imgresample.c + $(CC) $(CFLAGS) -DTEST -o $@ $^ + +dct-test: dct-test.o jfdctfst.o i386/fdct_mmx.o i386/fdctdata.o fdctref.o + $(CC) -o $@ $^ diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c new file mode 100644 index 0000000000..7332a11d8e --- /dev/null +++ b/libavcodec/ac3dec.c @@ -0,0 +1,189 @@ +/* + * AC3 decoder + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "avcodec.h" + +#include <inttypes.h> +#include "libac3/ac3.h" + +/* currently, I use libac3 which is Copyright (C) Aaron Holtzman and + released under the GPL license. I may reimplement it someday... */ +typedef struct AC3DecodeState { + UINT8 inbuf[4096]; /* input buffer */ + UINT8 *inbuf_ptr; + int frame_size; + int flags; + ac3_state_t state; +} AC3DecodeState; + +static int ac3_decode_init(AVCodecContext *avctx) +{ + AC3DecodeState *s = avctx->priv_data; + + ac3_init (); + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + return 0; +} + +stream_samples_t samples; + +/**** the following two functions comes from ac3dec */ +static inline int blah (int32_t i) +{ + if (i > 0x43c07fff) + return 32767; + else if (i < 0x43bf8000) + return -32768; + else + return i - 0x43c00000; +} + +static inline void float_to_int (float * _f, INT16 * s16) +{ + int i; + int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format + + for (i = 0; i < 256; i++) { + s16[2*i] = blah (f[i]); + s16[2*i+1] = blah (f[i+256]); + } +} + +static inline void float_to_int_mono (float * _f, INT16 * s16) +{ + int i; + int32_t * f = (int32_t *) _f; // XXX assumes IEEE float format + + for (i = 0; i < 256; i++) { + s16[i] = blah (f[i]); + } +} + +/**** end */ + +#define HEADER_SIZE 7 + +static int ac3_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + AC3DecodeState *s = avctx->priv_data; + UINT8 *buf_ptr; + int flags, i, len; + int sample_rate, bit_rate; + short *out_samples = data; + float level; + + *data_size = 0; + buf_ptr = buf; + while (buf_size > 0) { + len = s->inbuf_ptr - s->inbuf; + if (s->frame_size == 0) { + /* no header seen : find one. We need at least 7 bytes to parse it */ + len = HEADER_SIZE - len; + if (len > buf_size) + len = buf_size; + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + if ((s->inbuf_ptr - s->inbuf) == HEADER_SIZE) { + len = ac3_syncinfo (s->inbuf, &s->flags, &sample_rate, &bit_rate); + if (len == 0) { + /* no sync found : move by one byte (inefficient, but simple!) */ + memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); + s->inbuf_ptr--; + } else { + s->frame_size = len; + /* update codec info */ + avctx->sample_rate = sample_rate; + if ((s->flags & AC3_CHANNEL_MASK) == AC3_MONO) + avctx->channels = 1; + else + avctx->channels = 2; + avctx->bit_rate = bit_rate; + } + } + } else if (len < s->frame_size) { + len = s->frame_size - len; + if (len > buf_size) + len = buf_size; + + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + } else { + if (avctx->channels == 1) + flags = AC3_MONO; + else + flags = AC3_STEREO; + + flags |= AC3_ADJUST_LEVEL; + level = 1; + if (ac3_frame (&s->state, s->inbuf, &flags, &level, 384)) { + fail: + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + continue; + } + for (i = 0; i < 6; i++) { + if (ac3_block (&s->state)) + goto fail; + if (avctx->channels == 1) + float_to_int_mono (*samples, out_samples + i * 256); + else + float_to_int (*samples, out_samples + i * 512); + } + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + *data_size = 6 * avctx->channels * 256 * sizeof(INT16); + break; + } + } + return buf_ptr - buf; +} + +static int ac3_decode_end(AVCodecContext *s) +{ + return 0; +} + +AVCodec ac3_decoder = { + "ac3", + CODEC_TYPE_AUDIO, + CODEC_ID_AC3, + sizeof(AC3DecodeState), + ac3_decode_init, + NULL, + ac3_decode_end, + ac3_decode_frame, +}; + +/* register codecs which could clash with mplayer symbols */ +/* XXX: rename all symbols to avoid clashed */ +void avcodec_register_more(void) +{ + register_avcodec(&mp3_decoder); + register_avcodec(&ac3_decoder); +} + diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c new file mode 100644 index 0000000000..2bd4179aab --- /dev/null +++ b/libavcodec/ac3enc.c @@ -0,0 +1,1460 @@ +/* + * The simplest AC3 encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include "avcodec.h" + +#include "ac3enc.h" +#include "ac3tab.h" + +//#define DEBUG +//#define DEBUG_BITALLOC +#define NDEBUG +#include <assert.h> + +#define MDCT_NBITS 9 +#define N (1 << MDCT_NBITS) +#define NB_BLOCKS 6 /* number of PCM blocks inside an AC3 frame */ + +/* new exponents are sent if their Norm 1 exceed this number */ +#define EXP_DIFF_THRESHOLD 1000 + +/* exponent encoding strategy */ +#define EXP_REUSE 0 +#define EXP_NEW 1 + +#define EXP_D15 1 +#define EXP_D25 2 +#define EXP_D45 3 + +static void fft_init(int ln); +static void ac3_crc_init(void); + +static inline INT16 fix15(float a) +{ + int v; + v = (int)(a * (float)(1 << 15)); + if (v < -32767) + v = -32767; + else if (v > 32767) + v = 32767; + return v; +} + +static inline int calc_lowcomp1(int a, int b0, int b1) +{ + if ((b0 + 256) == b1) { + a = 384 ; + } else if (b0 > b1) { + a = a - 64; + if (a < 0) a=0; + } + return a; +} + +static inline int calc_lowcomp(int a, int b0, int b1, int bin) +{ + if (bin < 7) { + if ((b0 + 256) == b1) { + a = 384 ; + } else if (b0 > b1) { + a = a - 64; + if (a < 0) a=0; + } + } else if (bin < 20) { + if ((b0 + 256) == b1) { + a = 320 ; + } else if (b0 > b1) { + a= a - 64; + if (a < 0) a=0; + } + } else { + a = a - 128; + if (a < 0) a=0; + } + return a; +} + +/* AC3 bit allocation. The algorithm is the one described in the AC3 + spec with some optimizations because of our simplified encoding + assumptions. */ +void parametric_bit_allocation(AC3EncodeContext *s, UINT8 *bap, + INT8 *exp, int start, int end, + int snroffset, int fgain) +{ + int bin,i,j,k,end1,v,v1,bndstrt,bndend,lowcomp,begin; + int fastleak,slowleak,address,tmp; + INT16 psd[256]; /* scaled exponents */ + INT16 bndpsd[50]; /* interpolated exponents */ + INT16 excite[50]; /* excitation */ + INT16 mask[50]; /* masking value */ + + /* exponent mapping to PSD */ + for(bin=start;bin<end;bin++) { + psd[bin]=(3072 - (exp[bin] << 7)); + } + + /* PSD integration */ + j=start; + k=masktab[start]; + do { + v=psd[j]; + j++; + end1=bndtab[k+1]; + if (end1 > end) end1=end; + for(i=j;i<end1;i++) { + int c,adr; + /* logadd */ + v1=psd[j]; + c=v-v1; + if (c >= 0) { + adr=c >> 1; + if (adr > 255) adr=255; + v=v + latab[adr]; + } else { + adr=(-c) >> 1; + if (adr > 255) adr=255; + v=v1 + latab[adr]; + } + j++; + } + bndpsd[k]=v; + k++; + } while (end > bndtab[k]); + + /* excitation function */ + bndstrt = masktab[start]; + bndend = masktab[end-1] + 1; + + lowcomp = 0; + lowcomp = calc_lowcomp1(lowcomp, bndpsd[0], bndpsd[1]) ; + excite[0] = bndpsd[0] - fgain - lowcomp ; + lowcomp = calc_lowcomp1(lowcomp, bndpsd[1], bndpsd[2]) ; + excite[1] = bndpsd[1] - fgain - lowcomp ; + begin = 7 ; + for (bin = 2; bin < 7; bin++) { + lowcomp = calc_lowcomp1(lowcomp, bndpsd[bin], bndpsd[bin+1]) ; + fastleak = bndpsd[bin] - fgain ; + slowleak = bndpsd[bin] - s->sgain ; + excite[bin] = fastleak - lowcomp ; + if (bndpsd[bin] <= bndpsd[bin+1]) { + begin = bin + 1 ; + break ; + } + } + + end1=bndend; + if (end1 > 22) end1=22; + + for (bin = begin; bin < end1; bin++) { + lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin) ; + + fastleak -= s->fdecay ; + v = bndpsd[bin] - fgain; + if (fastleak < v) fastleak = v; + + slowleak -= s->sdecay ; + v = bndpsd[bin] - s->sgain; + if (slowleak < v) slowleak = v; + + v=fastleak - lowcomp; + if (slowleak > v) v=slowleak; + + excite[bin] = v; + } + + for (bin = 22; bin < bndend; bin++) { + fastleak -= s->fdecay ; + v = bndpsd[bin] - fgain; + if (fastleak < v) fastleak = v; + slowleak -= s->sdecay ; + v = bndpsd[bin] - s->sgain; + if (slowleak < v) slowleak = v; + + v=fastleak; + if (slowleak > v) v = slowleak; + excite[bin] = v; + } + + /* compute masking curve */ + + for (bin = bndstrt; bin < bndend; bin++) { + v1 = excite[bin]; + tmp = s->dbknee - bndpsd[bin]; + if (tmp > 0) { + v1 += tmp >> 2; + } + v=hth[bin >> s->halfratecod][s->fscod]; + if (v1 > v) v=v1; + mask[bin] = v; + } + + /* compute bit allocation */ + + i = start ; + j = masktab[start] ; + do { + v=mask[j]; + v -= snroffset ; + v -= s->floor ; + if (v < 0) v = 0; + v &= 0x1fe0 ; + v += s->floor ; + + end1=bndtab[j] + bndsz[j]; + if (end1 > end) end1=end; + + for (k = i; k < end1; k++) { + address = (psd[i] - v) >> 5 ; + if (address < 0) address=0; + else if (address > 63) address=63; + bap[i] = baptab[address]; + i++; + } + } while (end > bndtab[j++]) ; +} + +typedef struct IComplex { + short re,im; +} IComplex; + +static void fft_init(int ln) +{ + int i, j, m, n; + float alpha; + + n = 1 << ln; + + for(i=0;i<(n/2);i++) { + alpha = 2 * M_PI * (float)i / (float)n; + costab[i] = fix15(cos(alpha)); + sintab[i] = fix15(sin(alpha)); + } + + for(i=0;i<n;i++) { + m=0; + for(j=0;j<ln;j++) { + m |= ((i >> j) & 1) << (ln-j-1); + } + fft_rev[i]=m; + } +} + +/* butter fly op */ +#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ +{\ + int ax, ay, bx, by;\ + bx=pre1;\ + by=pim1;\ + ax=qre1;\ + ay=qim1;\ + pre = (bx + ax) >> 1;\ + pim = (by + ay) >> 1;\ + qre = (bx - ax) >> 1;\ + qim = (by - ay) >> 1;\ +} + +#define MUL16(a,b) ((a) * (b)) + +#define CMUL(pre, pim, are, aim, bre, bim) \ +{\ + pre = (MUL16(are, bre) - MUL16(aim, bim)) >> 15;\ + pim = (MUL16(are, bim) + MUL16(bre, aim)) >> 15;\ +} + + +/* do a 2^n point complex fft on 2^ln points. */ +static void fft(IComplex *z, int ln) +{ + int j, l, np, np2; + int nblocks, nloops; + register IComplex *p,*q; + int tmp_re, tmp_im; + + np = 1 << ln; + + /* reverse */ + for(j=0;j<np;j++) { + int k; + IComplex tmp; + k = fft_rev[j]; + if (k < j) { + tmp = z[k]; + z[k] = z[j]; + z[j] = tmp; + } + } + + /* pass 0 */ + + p=&z[0]; + j=(np >> 1); + do { + BF(p[0].re, p[0].im, p[1].re, p[1].im, + p[0].re, p[0].im, p[1].re, p[1].im); + p+=2; + } while (--j != 0); + + /* pass 1 */ + + p=&z[0]; + j=np >> 2; + do { + BF(p[0].re, p[0].im, p[2].re, p[2].im, + p[0].re, p[0].im, p[2].re, p[2].im); + BF(p[1].re, p[1].im, p[3].re, p[3].im, + p[1].re, p[1].im, p[3].im, -p[3].re); + p+=4; + } while (--j != 0); + + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + do { + p = z; + q = z + nloops; + for (j = 0; j < nblocks; ++j) { + + BF(p->re, p->im, q->re, q->im, + p->re, p->im, q->re, q->im); + + p++; + q++; + for(l = nblocks; l < np2; l += nblocks) { + CMUL(tmp_re, tmp_im, costab[l], -sintab[l], q->re, q->im); + BF(p->re, p->im, q->re, q->im, + p->re, p->im, tmp_re, tmp_im); + p++; + q++; + } + p += nloops; + q += nloops; + } + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} + +/* do a 512 point mdct */ +static void mdct512(INT32 *out, INT16 *in) +{ + int i, re, im, re1, im1; + INT16 rot[N]; + IComplex x[N/4]; + + /* shift to simplify computations */ + for(i=0;i<N/4;i++) + rot[i] = -in[i + 3*N/4]; + for(i=N/4;i<N;i++) + rot[i] = in[i - N/4]; + + /* pre rotation */ + for(i=0;i<N/4;i++) { + re = ((int)rot[2*i] - (int)rot[N-1-2*i]) >> 1; + im = -((int)rot[N/2+2*i] - (int)rot[N/2-1-2*i]) >> 1; + CMUL(x[i].re, x[i].im, re, im, -xcos1[i], xsin1[i]); + } + + fft(x, MDCT_NBITS - 2); + + /* post rotation */ + for(i=0;i<N/4;i++) { + re = x[i].re; + im = x[i].im; + CMUL(re1, im1, re, im, xsin1[i], xcos1[i]); + out[2*i] = im1; + out[N/2-1-2*i] = re1; + } +} + +/* XXX: use another norm ? */ +static int calc_exp_diff(UINT8 *exp1, UINT8 *exp2, int n) +{ + int sum, i; + sum = 0; + for(i=0;i<n;i++) { + sum += abs(exp1[i] - exp2[i]); + } + return sum; +} + +static void compute_exp_strategy(UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + UINT8 exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + int ch) +{ + int i, j; + int exp_diff; + + /* estimate if the exponent variation & decide if they should be + reused in the next frame */ + exp_strategy[0][ch] = EXP_NEW; + for(i=1;i<NB_BLOCKS;i++) { + exp_diff = calc_exp_diff(exp[i][ch], exp[i-1][ch], N/2); +#ifdef DEBUG + printf("exp_diff=%d\n", exp_diff); +#endif + if (exp_diff > EXP_DIFF_THRESHOLD) + exp_strategy[i][ch] = EXP_NEW; + else + exp_strategy[i][ch] = EXP_REUSE; + } + /* now select the encoding strategy type : if exponents are often + recoded, we use a coarse encoding */ + i = 0; + while (i < NB_BLOCKS) { + j = i + 1; + while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) + j++; + switch(j - i) { + case 1: + exp_strategy[i][ch] = EXP_D45; + break; + case 2: + case 3: + exp_strategy[i][ch] = EXP_D25; + break; + default: + exp_strategy[i][ch] = EXP_D15; + break; + } + i = j; + } +} + +/* set exp[i] to min(exp[i], exp1[i]) */ +static void exponent_min(UINT8 exp[N/2], UINT8 exp1[N/2], int n) +{ + int i; + + for(i=0;i<n;i++) { + if (exp1[i] < exp[i]) + exp[i] = exp1[i]; + } +} + +/* update the exponents so that they are the ones the decoder will + decode. Return the number of bits used to code the exponents */ +static int encode_exp(UINT8 encoded_exp[N/2], + UINT8 exp[N/2], + int nb_exps, + int exp_strategy) +{ + int group_size, nb_groups, i, j, k, recurse, exp_min, delta; + UINT8 exp1[N/2]; + + switch(exp_strategy) { + case EXP_D15: + group_size = 1; + break; + case EXP_D25: + group_size = 2; + break; + default: + case EXP_D45: + group_size = 4; + break; + } + nb_groups = ((nb_exps + (group_size * 3) - 4) / (3 * group_size)) * 3; + + /* for each group, compute the minimum exponent */ + exp1[0] = exp[0]; /* DC exponent is handled separately */ + k = 1; + for(i=1;i<=nb_groups;i++) { + exp_min = exp[k]; + assert(exp_min >= 0 && exp_min <= 24); + for(j=1;j<group_size;j++) { + if (exp[k+j] < exp_min) + exp_min = exp[k+j]; + } + exp1[i] = exp_min; + k += group_size; + } + + /* constraint for DC exponent */ + if (exp1[0] > 15) + exp1[0] = 15; + + /* Iterate until the delta constraints between each groups are + satisfyed. I'm sure it is possible to find a better algorithm, + but I am lazy */ + do { + recurse = 0; + for(i=1;i<=nb_groups;i++) { + delta = exp1[i] - exp1[i-1]; + if (delta > 2) { + /* if delta too big, we encode a smaller exponent */ + exp1[i] = exp1[i-1] + 2; + } else if (delta < -2) { + /* if delta is too small, we must decrease the previous + exponent, which means we must recurse */ + recurse = 1; + exp1[i-1] = exp1[i] + 2; + } + } + } while (recurse); + + /* now we have the exponent values the decoder will see */ + encoded_exp[0] = exp1[0]; + k = 1; + for(i=1;i<=nb_groups;i++) { + for(j=0;j<group_size;j++) { + encoded_exp[k+j] = exp1[i]; + } + k += group_size; + } + +#if defined(DEBUG) + printf("exponents: strategy=%d\n", exp_strategy); + for(i=0;i<=nb_groups * group_size;i++) { + printf("%d ", encoded_exp[i]); + } + printf("\n"); +#endif + + return 4 + (nb_groups / 3) * 7; +} + +/* return the size in bits taken by the mantissa */ +int compute_mantissa_size(AC3EncodeContext *s, UINT8 *m, int nb_coefs) +{ + int bits, mant, i; + + bits = 0; + for(i=0;i<nb_coefs;i++) { + mant = m[i]; + switch(mant) { + case 0: + /* nothing */ + break; + case 1: + /* 3 mantissa in 5 bits */ + if (s->mant1_cnt == 0) + bits += 5; + if (++s->mant1_cnt == 3) + s->mant1_cnt = 0; + break; + case 2: + /* 3 mantissa in 7 bits */ + if (s->mant2_cnt == 0) + bits += 7; + if (++s->mant2_cnt == 3) + s->mant2_cnt = 0; + break; + case 3: + bits += 3; + break; + case 4: + /* 2 mantissa in 7 bits */ + if (s->mant4_cnt == 0) + bits += 7; + if (++s->mant4_cnt == 2) + s->mant4_cnt = 0; + break; + case 14: + bits += 14; + break; + case 15: + bits += 16; + break; + default: + bits += mant - 1; + break; + } + } + return bits; +} + + +static int bit_alloc(AC3EncodeContext *s, + UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + int frame_bits, int csnroffst, int fsnroffst) +{ + int i, ch; + + /* compute size */ + for(i=0;i<NB_BLOCKS;i++) { + s->mant1_cnt = 0; + s->mant2_cnt = 0; + s->mant4_cnt = 0; + for(ch=0;ch<s->nb_channels;ch++) { + parametric_bit_allocation(s, bap[i][ch], encoded_exp[i][ch], + 0, s->nb_coefs[ch], + (((csnroffst-15) << 4) + + fsnroffst) << 2, + fgaintab[s->fgaincod[ch]]); + frame_bits += compute_mantissa_size(s, bap[i][ch], + s->nb_coefs[ch]); + } + } +#if 0 + printf("csnr=%d fsnr=%d frame_bits=%d diff=%d\n", + csnroffst, fsnroffst, frame_bits, + 16 * s->frame_size - ((frame_bits + 7) & ~7)); +#endif + return 16 * s->frame_size - frame_bits; +} + +#define SNR_INC1 4 + +static int compute_bit_allocation(AC3EncodeContext *s, + UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2], + UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS], + int frame_bits) +{ + int i, ch; + int csnroffst, fsnroffst; + UINT8 bap1[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + + /* init default parameters */ + s->sdecaycod = 2; + s->fdecaycod = 1; + s->sgaincod = 1; + s->dbkneecod = 2; + s->floorcod = 4; + for(ch=0;ch<s->nb_channels;ch++) + s->fgaincod[ch] = 4; + + /* compute real values */ + s->sdecay = sdecaytab[s->sdecaycod] >> s->halfratecod; + s->fdecay = fdecaytab[s->fdecaycod] >> s->halfratecod; + s->sgain = sgaintab[s->sgaincod]; + s->dbknee = dbkneetab[s->dbkneecod]; + s->floor = floortab[s->floorcod]; + + /* header size */ + frame_bits += 65; + if (s->acmod == 2) + frame_bits += 2; + + /* audio blocks */ + for(i=0;i<NB_BLOCKS;i++) { + frame_bits += s->nb_channels * 2 + 2; + if (s->acmod == 2) + frame_bits++; + frame_bits += 2 * s->nb_channels; + for(ch=0;ch<s->nb_channels;ch++) { + if (exp_strategy[i][ch] != EXP_REUSE) + frame_bits += 6 + 2; + } + frame_bits++; /* baie */ + frame_bits++; /* snr */ + frame_bits += 2; /* delta / skip */ + } + frame_bits++; /* cplinu for block 0 */ + /* bit alloc info */ + frame_bits += 2*4 + 3 + 6 + s->nb_channels * (4 + 3); + + /* CRC */ + frame_bits += 16; + + /* now the big work begins : do the bit allocation. Modify the snr + offset until we can pack everything in the requested frame size */ + + csnroffst = s->csnroffst; + while (csnroffst >= 0 && + bit_alloc(s, bap, encoded_exp, exp_strategy, frame_bits, csnroffst, 0) < 0) + csnroffst -= SNR_INC1; + if (csnroffst < 0) { + fprintf(stderr, "Error !!!\n"); + return -1; + } + while ((csnroffst + SNR_INC1) <= 63 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst + SNR_INC1, 0) >= 0) { + csnroffst += SNR_INC1; + memcpy(bap, bap1, sizeof(bap1)); + } + while ((csnroffst + 1) <= 63 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, csnroffst + 1, 0) >= 0) { + csnroffst++; + memcpy(bap, bap1, sizeof(bap1)); + } + + fsnroffst = 0; + while ((fsnroffst + SNR_INC1) <= 15 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst, fsnroffst + SNR_INC1) >= 0) { + fsnroffst += SNR_INC1; + memcpy(bap, bap1, sizeof(bap1)); + } + while ((fsnroffst + 1) <= 15 && + bit_alloc(s, bap1, encoded_exp, exp_strategy, frame_bits, + csnroffst, fsnroffst + 1) >= 0) { + fsnroffst++; + memcpy(bap, bap1, sizeof(bap1)); + } + + s->csnroffst = csnroffst; + for(ch=0;ch<s->nb_channels;ch++) + s->fsnroffst[ch] = fsnroffst; +#if defined(DEBUG_BITALLOC) + { + int j; + + for(i=0;i<6;i++) { + for(ch=0;ch<s->nb_channels;ch++) { + printf("Block #%d Ch%d:\n", i, ch); + printf("bap="); + for(j=0;j<s->nb_coefs[ch];j++) { + printf("%d ",bap[i][ch][j]); + } + printf("\n"); + } + } + } +#endif + return 0; +} + +static int AC3_encode_init(AVCodecContext *avctx) +{ + int freq = avctx->sample_rate; + int bitrate = avctx->bit_rate; + int channels = avctx->channels; + AC3EncodeContext *s = avctx->priv_data; + int i, j, k, l, ch, v; + float alpha; + static unsigned short freqs[3] = { 48000, 44100, 32000 }; + + avctx->frame_size = AC3_FRAME_SIZE; + avctx->key_frame = 1; /* always key frame */ + + /* number of channels */ + if (channels == 1) + s->acmod = 1; + else if (channels == 2) + s->acmod = 2; + else + return -1; + s->nb_channels = channels; + + /* frequency */ + for(i=0;i<3;i++) { + for(j=0;j<3;j++) + if ((freqs[j] >> i) == freq) + goto found; + } + return -1; + found: + s->sample_rate = freq; + s->halfratecod = i; + s->fscod = j; + s->bsid = 8 + s->halfratecod; + s->bsmod = 0; /* complete main audio service */ + + /* bitrate & frame size */ + bitrate /= 1000; + for(i=0;i<19;i++) { + if ((bitratetab[i] >> s->halfratecod) == bitrate) + break; + } + if (i == 19) + return -1; + s->bit_rate = bitrate; + s->frmsizecod = i << 1; + s->frame_size_min = (bitrate * 1000 * AC3_FRAME_SIZE) / (freq * 16); + /* for now we do not handle fractional sizes */ + s->frame_size = s->frame_size_min; + + /* bit allocation init */ + for(ch=0;ch<s->nb_channels;ch++) { + /* bandwidth for each channel */ + /* XXX: should compute the bandwidth according to the frame + size, so that we avoid anoying high freq artefacts */ + s->chbwcod[ch] = 50; /* sample bandwidth as mpeg audio layer 2 table 0 */ + s->nb_coefs[ch] = ((s->chbwcod[ch] + 12) * 3) + 37; + } + /* initial snr offset */ + s->csnroffst = 40; + + /* compute bndtab and masktab from bandsz */ + k = 0; + l = 0; + for(i=0;i<50;i++) { + bndtab[i] = l; + v = bndsz[i]; + for(j=0;j<v;j++) masktab[k++]=i; + l += v; + } + bndtab[50] = 0; + + /* mdct init */ + fft_init(MDCT_NBITS - 2); + for(i=0;i<N/4;i++) { + alpha = 2 * M_PI * (i + 1.0 / 8.0) / (float)N; + xcos1[i] = fix15(-cos(alpha)); + xsin1[i] = fix15(-sin(alpha)); + } + + ac3_crc_init(); + + return 0; +} + +/* output the AC3 frame header */ +static void output_frame_header(AC3EncodeContext *s, unsigned char *frame) +{ + init_put_bits(&s->pb, frame, AC3_MAX_CODED_FRAME_SIZE, NULL, NULL); + + put_bits(&s->pb, 16, 0x0b77); /* frame header */ + put_bits(&s->pb, 16, 0); /* crc1: will be filled later */ + put_bits(&s->pb, 2, s->fscod); + put_bits(&s->pb, 6, s->frmsizecod + (s->frame_size - s->frame_size_min)); + put_bits(&s->pb, 5, s->bsid); + put_bits(&s->pb, 3, s->bsmod); + put_bits(&s->pb, 3, s->acmod); + if (s->acmod == 2) { + put_bits(&s->pb, 2, 0); /* surround not indicated */ + } + put_bits(&s->pb, 1, 0); /* no LFE */ + put_bits(&s->pb, 5, 31); /* dialog norm: -31 db */ + put_bits(&s->pb, 1, 0); /* no compression control word */ + put_bits(&s->pb, 1, 0); /* no lang code */ + put_bits(&s->pb, 1, 0); /* no audio production info */ + put_bits(&s->pb, 1, 0); /* no copyright */ + put_bits(&s->pb, 1, 1); /* original bitstream */ + put_bits(&s->pb, 1, 0); /* no time code 1 */ + put_bits(&s->pb, 1, 0); /* no time code 2 */ + put_bits(&s->pb, 1, 0); /* no addtional bit stream info */ +} + +/* symetric quantization on 'levels' levels */ +static inline int sym_quant(int c, int e, int levels) +{ + int v; + + if (c >= 0) { + v = (levels * (c << e)) >> 25; + v = (levels >> 1) + v; + } else { + v = (levels * ((-c) << e)) >> 25; + v = (levels >> 1) - v; + } + assert (v >= 0 && v < levels); + return v; +} + +/* asymetric quantization on 2^qbits levels */ +static inline int asym_quant(int c, int e, int qbits) +{ + int lshift, m, v; + + lshift = e + qbits - 24; + if (lshift >= 0) + v = c << lshift; + else + v = c >> (-lshift); + /* rounding */ + v = (v + 1) >> 1; + m = (1 << (qbits-1)); + if (v >= m) + v = m - 1; + assert(v >= -m); + return v & ((1 << qbits)-1); +} + +/* Output one audio block. There are NB_BLOCKS audio blocks in one AC3 + frame */ +static void output_audio_block(AC3EncodeContext *s, + UINT8 exp_strategy[AC3_MAX_CHANNELS], + UINT8 encoded_exp[AC3_MAX_CHANNELS][N/2], + UINT8 bap[AC3_MAX_CHANNELS][N/2], + INT32 mdct_coefs[AC3_MAX_CHANNELS][N/2], + INT8 global_exp[AC3_MAX_CHANNELS], + int block_num) +{ + int ch, nb_groups, group_size, i, baie; + UINT8 *p; + UINT16 qmant[AC3_MAX_CHANNELS][N/2]; + int exp0, exp1; + int mant1_cnt, mant2_cnt, mant4_cnt; + UINT16 *qmant1_ptr, *qmant2_ptr, *qmant4_ptr; + int delta0, delta1, delta2; + + for(ch=0;ch<s->nb_channels;ch++) + put_bits(&s->pb, 1, 0); /* 512 point MDCT */ + for(ch=0;ch<s->nb_channels;ch++) + put_bits(&s->pb, 1, 1); /* no dither */ + put_bits(&s->pb, 1, 0); /* no dynamic range */ + if (block_num == 0) { + /* for block 0, even if no coupling, we must say it. This is a + waste of bit :-) */ + put_bits(&s->pb, 1, 1); /* coupling strategy present */ + put_bits(&s->pb, 1, 0); /* no coupling strategy */ + } else { + put_bits(&s->pb, 1, 0); /* no new coupling strategy */ + } + + if (s->acmod == 2) { + put_bits(&s->pb, 1, 0); /* no matrixing (but should be used in the future) */ + } + +#if defined(DEBUG) + { + static int count = 0; + printf("Block #%d (%d)\n", block_num, count++); + } +#endif + /* exponent strategy */ + for(ch=0;ch<s->nb_channels;ch++) { + put_bits(&s->pb, 2, exp_strategy[ch]); + } + + for(ch=0;ch<s->nb_channels;ch++) { + if (exp_strategy[ch] != EXP_REUSE) + put_bits(&s->pb, 6, s->chbwcod[ch]); + } + + /* exponents */ + for (ch = 0; ch < s->nb_channels; ch++) { + switch(exp_strategy[ch]) { + case EXP_REUSE: + continue; + case EXP_D15: + group_size = 1; + break; + case EXP_D25: + group_size = 2; + break; + default: + case EXP_D45: + group_size = 4; + break; + } + nb_groups = (s->nb_coefs[ch] + (group_size * 3) - 4) / (3 * group_size); + p = encoded_exp[ch]; + + /* first exponent */ + exp1 = *p++; + put_bits(&s->pb, 4, exp1); + + /* next ones are delta encoded */ + for(i=0;i<nb_groups;i++) { + /* merge three delta in one code */ + exp0 = exp1; + exp1 = p[0]; + p += group_size; + delta0 = exp1 - exp0 + 2; + + exp0 = exp1; + exp1 = p[0]; + p += group_size; + delta1 = exp1 - exp0 + 2; + + exp0 = exp1; + exp1 = p[0]; + p += group_size; + delta2 = exp1 - exp0 + 2; + + put_bits(&s->pb, 7, ((delta0 * 5 + delta1) * 5) + delta2); + } + + put_bits(&s->pb, 2, 0); /* no gain range info */ + } + + /* bit allocation info */ + baie = (block_num == 0); + put_bits(&s->pb, 1, baie); + if (baie) { + put_bits(&s->pb, 2, s->sdecaycod); + put_bits(&s->pb, 2, s->fdecaycod); + put_bits(&s->pb, 2, s->sgaincod); + put_bits(&s->pb, 2, s->dbkneecod); + put_bits(&s->pb, 3, s->floorcod); + } + + /* snr offset */ + put_bits(&s->pb, 1, baie); /* always present with bai */ + if (baie) { + put_bits(&s->pb, 6, s->csnroffst); + for(ch=0;ch<s->nb_channels;ch++) { + put_bits(&s->pb, 4, s->fsnroffst[ch]); + put_bits(&s->pb, 3, s->fgaincod[ch]); + } + } + + put_bits(&s->pb, 1, 0); /* no delta bit allocation */ + put_bits(&s->pb, 1, 0); /* no data to skip */ + + /* mantissa encoding : we use two passes to handle the grouping. A + one pass method may be faster, but it would necessitate to + modify the output stream. */ + + /* first pass: quantize */ + mant1_cnt = mant2_cnt = mant4_cnt = 0; + qmant1_ptr = qmant2_ptr = qmant4_ptr = NULL; + + for (ch = 0; ch < s->nb_channels; ch++) { + int b, c, e, v; + + for(i=0;i<s->nb_coefs[ch];i++) { + c = mdct_coefs[ch][i]; + e = encoded_exp[ch][i] - global_exp[ch]; + b = bap[ch][i]; + switch(b) { + case 0: + v = 0; + break; + case 1: + v = sym_quant(c, e, 3); + switch(mant1_cnt) { + case 0: + qmant1_ptr = &qmant[ch][i]; + v = 9 * v; + mant1_cnt = 1; + break; + case 1: + *qmant1_ptr += 3 * v; + mant1_cnt = 2; + v = 128; + break; + default: + *qmant1_ptr += v; + mant1_cnt = 0; + v = 128; + break; + } + break; + case 2: + v = sym_quant(c, e, 5); + switch(mant2_cnt) { + case 0: + qmant2_ptr = &qmant[ch][i]; + v = 25 * v; + mant2_cnt = 1; + break; + case 1: + *qmant2_ptr += 5 * v; + mant2_cnt = 2; + v = 128; + break; + default: + *qmant2_ptr += v; + mant2_cnt = 0; + v = 128; + break; + } + break; + case 3: + v = sym_quant(c, e, 7); + break; + case 4: + v = sym_quant(c, e, 11); + switch(mant4_cnt) { + case 0: + qmant4_ptr = &qmant[ch][i]; + v = 11 * v; + mant4_cnt = 1; + break; + default: + *qmant4_ptr += v; + mant4_cnt = 0; + v = 128; + break; + } + break; + case 5: + v = sym_quant(c, e, 15); + break; + case 14: + v = asym_quant(c, e, 14); + break; + case 15: + v = asym_quant(c, e, 16); + break; + default: + v = asym_quant(c, e, b - 1); + break; + } + qmant[ch][i] = v; + } + } + + /* second pass : output the values */ + for (ch = 0; ch < s->nb_channels; ch++) { + int b, q; + + for(i=0;i<s->nb_coefs[ch];i++) { + q = qmant[ch][i]; + b = bap[ch][i]; + switch(b) { + case 0: + break; + case 1: + if (q != 128) + put_bits(&s->pb, 5, q); + break; + case 2: + if (q != 128) + put_bits(&s->pb, 7, q); + break; + case 3: + put_bits(&s->pb, 3, q); + break; + case 4: + if (q != 128) + put_bits(&s->pb, 7, q); + break; + case 14: + put_bits(&s->pb, 14, q); + break; + case 15: + put_bits(&s->pb, 16, q); + break; + default: + put_bits(&s->pb, b - 1, q); + break; + } + } + } +} + +/* compute the ac3 crc */ + +#define CRC16_POLY ((1 << 0) | (1 << 2) | (1 << 15) | (1 << 16)) + +static void ac3_crc_init(void) +{ + unsigned int c, n, k; + + for(n=0;n<256;n++) { + c = n << 8; + for (k = 0; k < 8; k++) { + if (c & (1 << 15)) + c = ((c << 1) & 0xffff) ^ (CRC16_POLY & 0xffff); + else + c = c << 1; + } + crc_table[n] = c; + } +} + +static unsigned int ac3_crc(UINT8 *data, int n, unsigned int crc) +{ + int i; + for(i=0;i<n;i++) { + crc = (crc_table[data[i] ^ (crc >> 8)] ^ (crc << 8)) & 0xffff; + } + return crc; +} + +static unsigned int mul_poly(unsigned int a, unsigned int b, unsigned int poly) +{ + unsigned int c; + + c = 0; + while (a) { + if (a & 1) + c ^= b; + a = a >> 1; + b = b << 1; + if (b & (1 << 16)) + b ^= poly; + } + return c; +} + +static unsigned int pow_poly(unsigned int a, unsigned int n, unsigned int poly) +{ + unsigned int r; + r = 1; + while (n) { + if (n & 1) + r = mul_poly(r, a, poly); + a = mul_poly(a, a, poly); + n >>= 1; + } + return r; +} + + +/* compute log2(max(abs(tab[]))) */ +static int log2_tab(INT16 *tab, int n) +{ + int i, v; + + v = 0; + for(i=0;i<n;i++) { + v |= abs(tab[i]); + } + return log2(v); +} + +static void lshift_tab(INT16 *tab, int n, int lshift) +{ + int i; + + if (lshift > 0) { + for(i=0;i<n;i++) { + tab[i] <<= lshift; + } + } else if (lshift < 0) { + lshift = -lshift; + for(i=0;i<n;i++) { + tab[i] >>= lshift; + } + } +} + +/* fill the end of the frame and compute the two crcs */ +static int output_frame_end(AC3EncodeContext *s) +{ + int frame_size, frame_size_58, n, crc1, crc2, crc_inv; + UINT8 *frame; + + frame_size = s->frame_size; /* frame size in words */ + /* align to 8 bits */ + flush_put_bits(&s->pb); + /* add zero bytes to reach the frame size */ + frame = s->pb.buf; + n = 2 * s->frame_size - (s->pb.buf_ptr - frame) - 2; + assert(n >= 0); + memset(s->pb.buf_ptr, 0, n); + + /* Now we must compute both crcs : this is not so easy for crc1 + because it is at the beginning of the data... */ + frame_size_58 = (frame_size >> 1) + (frame_size >> 3); + crc1 = ac3_crc(frame + 4, (2 * frame_size_58) - 4, 0); + /* XXX: could precompute crc_inv */ + crc_inv = pow_poly((CRC16_POLY >> 1), (16 * frame_size_58) - 16, CRC16_POLY); + crc1 = mul_poly(crc_inv, crc1, CRC16_POLY); + frame[2] = crc1 >> 8; + frame[3] = crc1; + + crc2 = ac3_crc(frame + 2 * frame_size_58, (frame_size - frame_size_58) * 2 - 2, 0); + frame[2*frame_size - 2] = crc2 >> 8; + frame[2*frame_size - 1] = crc2; + + // printf("n=%d frame_size=%d\n", n, frame_size); + return frame_size * 2; +} + +int AC3_encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + AC3EncodeContext *s = avctx->priv_data; + short *samples = data; + int i, j, k, v, ch; + INT16 input_samples[N]; + INT32 mdct_coef[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + UINT8 exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + UINT8 exp_strategy[NB_BLOCKS][AC3_MAX_CHANNELS]; + UINT8 encoded_exp[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + UINT8 bap[NB_BLOCKS][AC3_MAX_CHANNELS][N/2]; + INT8 exp_samples[NB_BLOCKS][AC3_MAX_CHANNELS]; + int frame_bits; + + frame_bits = 0; + for(ch=0;ch<s->nb_channels;ch++) { + /* fixed mdct to the six sub blocks & exponent computation */ + for(i=0;i<NB_BLOCKS;i++) { + INT16 *sptr; + int sinc; + + /* compute input samples */ + memcpy(input_samples, s->last_samples[ch], N/2 * sizeof(INT16)); + sinc = s->nb_channels; + sptr = samples + (sinc * (N/2) * i) + ch; + for(j=0;j<N/2;j++) { + v = *sptr; + input_samples[j + N/2] = v; + s->last_samples[ch][j] = v; + sptr += sinc; + } + + /* apply the MDCT window */ + for(j=0;j<N/2;j++) { + input_samples[j] = MUL16(input_samples[j], + ac3_window[j]) >> 15; + input_samples[N-j-1] = MUL16(input_samples[N-j-1], + ac3_window[j]) >> 15; + } + + /* Normalize the samples to use the maximum available + precision */ + v = 14 - log2_tab(input_samples, N); + if (v < 0) + v = 0; + exp_samples[i][ch] = v - 8; + lshift_tab(input_samples, N, v); + + /* do the MDCT */ + mdct512(mdct_coef[i][ch], input_samples); + + /* compute "exponents". We take into account the + normalization there */ + for(j=0;j<N/2;j++) { + int e; + v = abs(mdct_coef[i][ch][j]); + if (v == 0) + e = 24; + else { + e = 23 - log2(v) + exp_samples[i][ch]; + if (e >= 24) { + e = 24; + mdct_coef[i][ch][j] = 0; + } + } + exp[i][ch][j] = e; + } + } + + compute_exp_strategy(exp_strategy, exp, ch); + + /* compute the exponents as the decoder will see them. The + EXP_REUSE case must be handled carefully : we select the + min of the exponents */ + i = 0; + while (i < NB_BLOCKS) { + j = i + 1; + while (j < NB_BLOCKS && exp_strategy[j][ch] == EXP_REUSE) { + exponent_min(exp[i][ch], exp[j][ch], s->nb_coefs[ch]); + j++; + } + frame_bits += encode_exp(encoded_exp[i][ch], + exp[i][ch], s->nb_coefs[ch], + exp_strategy[i][ch]); + /* copy encoded exponents for reuse case */ + for(k=i+1;k<j;k++) { + memcpy(encoded_exp[k][ch], encoded_exp[i][ch], + s->nb_coefs[ch] * sizeof(UINT8)); + } + i = j; + } + } + + compute_bit_allocation(s, bap, encoded_exp, exp_strategy, frame_bits); + /* everything is known... let's output the frame */ + output_frame_header(s, frame); + + for(i=0;i<NB_BLOCKS;i++) { + output_audio_block(s, exp_strategy[i], encoded_exp[i], + bap[i], mdct_coef[i], exp_samples[i], i); + } + return output_frame_end(s); +} + +#if 0 +/*************************************************************************/ +/* TEST */ + +#define FN (N/4) + +void fft_test(void) +{ + IComplex in[FN], in1[FN]; + int k, n, i; + float sum_re, sum_im, a; + + /* FFT test */ + + for(i=0;i<FN;i++) { + in[i].re = random() % 65535 - 32767; + in[i].im = random() % 65535 - 32767; + in1[i] = in[i]; + } + fft(in, 7); + + /* do it by hand */ + for(k=0;k<FN;k++) { + sum_re = 0; + sum_im = 0; + for(n=0;n<FN;n++) { + a = -2 * M_PI * (n * k) / FN; + sum_re += in1[n].re * cos(a) - in1[n].im * sin(a); + sum_im += in1[n].re * sin(a) + in1[n].im * cos(a); + } + printf("%3d: %6d,%6d %6.0f,%6.0f\n", + k, in[k].re, in[k].im, sum_re / FN, sum_im / FN); + } +} + +void mdct_test(void) +{ + INT16 input[N]; + INT32 output[N/2]; + float input1[N]; + float output1[N/2]; + float s, a, err, e, emax; + int i, k, n; + + for(i=0;i<N;i++) { + input[i] = (random() % 65535 - 32767) * 9 / 10; + input1[i] = input[i]; + } + + mdct512(output, input); + + /* do it by hand */ + for(k=0;k<N/2;k++) { + s = 0; + for(n=0;n<N;n++) { + a = (2*M_PI*(2*n+1+N/2)*(2*k+1) / (4 * N)); + s += input1[n] * cos(a); + } + output1[k] = -2 * s / N; + } + + err = 0; + emax = 0; + for(i=0;i<N/2;i++) { + printf("%3d: %7d %7.0f\n", i, output[i], output1[i]); + e = output[i] - output1[i]; + if (e > emax) + emax = e; + err += e * e; + } + printf("err2=%f emax=%f\n", err / (N/2), emax); +} + +void test_ac3(void) +{ + AC3EncodeContext ctx; + unsigned char frame[AC3_MAX_CODED_FRAME_SIZE]; + short samples[AC3_FRAME_SIZE]; + int ret, i; + + AC3_encode_init(&ctx, 44100, 64000, 1); + + fft_test(); + mdct_test(); + + for(i=0;i<AC3_FRAME_SIZE;i++) + samples[i] = (int)(sin(2*M_PI*i*1000.0/44100) * 10000); + ret = AC3_encode_frame(&ctx, frame, samples); + printf("ret=%d\n", ret); +} +#endif + +AVCodec ac3_encoder = { + "ac3", + CODEC_TYPE_AUDIO, + CODEC_ID_AC3, + sizeof(AC3EncodeContext), + AC3_encode_init, + AC3_encode_frame, + NULL, +}; diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h new file mode 100644 index 0000000000..40cc53aced --- /dev/null +++ b/libavcodec/ac3enc.h @@ -0,0 +1,32 @@ + +#define AC3_FRAME_SIZE (6*256) +#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */ +#define AC3_MAX_CHANNELS 2 /* we handle at most two channels, although + AC3 allows 6 channels */ + +typedef struct AC3EncodeContext { + PutBitContext pb; + int nb_channels; + int bit_rate; + int sample_rate; + int bsid; + int frame_size_min; /* minimum frame size in case rounding is necessary */ + int frame_size; /* current frame size in words */ + int halfratecod; + int frmsizecod; + int fscod; /* frequency */ + int acmod; + int bsmod; + short last_samples[AC3_MAX_CHANNELS][256]; + int chbwcod[AC3_MAX_CHANNELS]; + int nb_coefs[AC3_MAX_CHANNELS]; + + /* bitrate allocation control */ + int sgaincod, sdecaycod, fdecaycod, dbkneecod, floorcod; + int sgain, sdecay, fdecay, dbknee, floor; + int csnroffst; + int fgaincod[AC3_MAX_CHANNELS]; + int fsnroffst[AC3_MAX_CHANNELS]; + /* mantissa encoding */ + int mant1_cnt, mant2_cnt, mant4_cnt; +} AC3EncodeContext; diff --git a/libavcodec/ac3tab.h b/libavcodec/ac3tab.h new file mode 100644 index 0000000000..2d379f0404 --- /dev/null +++ b/libavcodec/ac3tab.h @@ -0,0 +1,180 @@ +/* tables taken directly from AC3 spec */ + +/* possible bitrates */ +static const UINT16 bitratetab[19] = { + 32, 40, 48, 56, 64, 80, 96, 112, 128, + 160, 192, 224, 256, 320, 384, 448, 512, 576, 640 +}; + +/* AC3 MDCT window */ + +/* MDCT window */ +static const INT16 ac3_window[256]= { + 4, 7, 12, 16, 21, 28, 34, 42, + 51, 61, 72, 84, 97, 111, 127, 145, + 164, 184, 207, 231, 257, 285, 315, 347, + 382, 419, 458, 500, 544, 591, 641, 694, + 750, 810, 872, 937, 1007, 1079, 1155, 1235, + 1318, 1406, 1497, 1593, 1692, 1796, 1903, 2016, + 2132, 2253, 2379, 2509, 2644, 2783, 2927, 3076, + 3230, 3389, 3552, 3721, 3894, 4072, 4255, 4444, + 4637, 4835, 5038, 5246, 5459, 5677, 5899, 6127, + 6359, 6596, 6837, 7083, 7334, 7589, 7848, 8112, + 8380, 8652, 8927, 9207, 9491, 9778,10069,10363, +10660,10960,11264,11570,11879,12190,12504,12820, +13138,13458,13780,14103,14427,14753,15079,15407, +15735,16063,16392,16720,17049,17377,17705,18032, +18358,18683,19007,19330,19651,19970,20287,20602, +20914,21225,21532,21837,22139,22438,22733,23025, +23314,23599,23880,24157,24430,24699,24964,25225, +25481,25732,25979,26221,26459,26691,26919,27142, +27359,27572,27780,27983,28180,28373,28560,28742, +28919,29091,29258,29420,29577,29729,29876,30018, +30155,30288,30415,30538,30657,30771,30880,30985, +31086,31182,31274,31363,31447,31528,31605,31678, +31747,31814,31877,31936,31993,32046,32097,32145, +32190,32232,32272,32310,32345,32378,32409,32438, +32465,32490,32513,32535,32556,32574,32592,32608, +32623,32636,32649,32661,32671,32681,32690,32698, +32705,32712,32718,32724,32729,32733,32737,32741, +32744,32747,32750,32752,32754,32756,32757,32759, +32760,32761,32762,32763,32764,32764,32765,32765, +32766,32766,32766,32766,32767,32767,32767,32767, +32767,32767,32767,32767,32767,32767,32767,32767, +32767,32767,32767,32767,32767,32767,32767,32767, +}; + +static UINT8 masktab[253]; + +static const UINT8 latab[260]= { +0x0040,0x003f,0x003e,0x003d,0x003c,0x003b,0x003a,0x0039,0x0038,0x0037, +0x0036,0x0035,0x0034,0x0034,0x0033,0x0032,0x0031,0x0030,0x002f,0x002f, +0x002e,0x002d,0x002c,0x002c,0x002b,0x002a,0x0029,0x0029,0x0028,0x0027, +0x0026,0x0026,0x0025,0x0024,0x0024,0x0023,0x0023,0x0022,0x0021,0x0021, +0x0020,0x0020,0x001f,0x001e,0x001e,0x001d,0x001d,0x001c,0x001c,0x001b, +0x001b,0x001a,0x001a,0x0019,0x0019,0x0018,0x0018,0x0017,0x0017,0x0016, +0x0016,0x0015,0x0015,0x0015,0x0014,0x0014,0x0013,0x0013,0x0013,0x0012, +0x0012,0x0012,0x0011,0x0011,0x0011,0x0010,0x0010,0x0010,0x000f,0x000f, +0x000f,0x000e,0x000e,0x000e,0x000d,0x000d,0x000d,0x000d,0x000c,0x000c, +0x000c,0x000c,0x000b,0x000b,0x000b,0x000b,0x000a,0x000a,0x000a,0x000a, +0x000a,0x0009,0x0009,0x0009,0x0009,0x0009,0x0008,0x0008,0x0008,0x0008, +0x0008,0x0008,0x0007,0x0007,0x0007,0x0007,0x0007,0x0007,0x0006,0x0006, +0x0006,0x0006,0x0006,0x0006,0x0006,0x0006,0x0005,0x0005,0x0005,0x0005, +0x0005,0x0005,0x0005,0x0005,0x0004,0x0004,0x0004,0x0004,0x0004,0x0004, +0x0004,0x0004,0x0004,0x0004,0x0004,0x0003,0x0003,0x0003,0x0003,0x0003, +0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0003,0x0002, +0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002, +0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0002,0x0001,0x0001, +0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, +0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, +0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001,0x0001, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, +}; + +static const UINT16 hth[50][3]= { +{ 0x04d0,0x04f0,0x0580 }, +{ 0x04d0,0x04f0,0x0580 }, +{ 0x0440,0x0460,0x04b0 }, +{ 0x0400,0x0410,0x0450 }, +{ 0x03e0,0x03e0,0x0420 }, +{ 0x03c0,0x03d0,0x03f0 }, +{ 0x03b0,0x03c0,0x03e0 }, +{ 0x03b0,0x03b0,0x03d0 }, +{ 0x03a0,0x03b0,0x03c0 }, +{ 0x03a0,0x03a0,0x03b0 }, +{ 0x03a0,0x03a0,0x03b0 }, +{ 0x03a0,0x03a0,0x03b0 }, +{ 0x03a0,0x03a0,0x03a0 }, +{ 0x0390,0x03a0,0x03a0 }, +{ 0x0390,0x0390,0x03a0 }, +{ 0x0390,0x0390,0x03a0 }, +{ 0x0380,0x0390,0x03a0 }, +{ 0x0380,0x0380,0x03a0 }, +{ 0x0370,0x0380,0x03a0 }, +{ 0x0370,0x0380,0x03a0 }, +{ 0x0360,0x0370,0x0390 }, +{ 0x0360,0x0370,0x0390 }, +{ 0x0350,0x0360,0x0390 }, +{ 0x0350,0x0360,0x0390 }, +{ 0x0340,0x0350,0x0380 }, +{ 0x0340,0x0350,0x0380 }, +{ 0x0330,0x0340,0x0380 }, +{ 0x0320,0x0340,0x0370 }, +{ 0x0310,0x0320,0x0360 }, +{ 0x0300,0x0310,0x0350 }, +{ 0x02f0,0x0300,0x0340 }, +{ 0x02f0,0x02f0,0x0330 }, +{ 0x02f0,0x02f0,0x0320 }, +{ 0x02f0,0x02f0,0x0310 }, +{ 0x0300,0x02f0,0x0300 }, +{ 0x0310,0x0300,0x02f0 }, +{ 0x0340,0x0320,0x02f0 }, +{ 0x0390,0x0350,0x02f0 }, +{ 0x03e0,0x0390,0x0300 }, +{ 0x0420,0x03e0,0x0310 }, +{ 0x0460,0x0420,0x0330 }, +{ 0x0490,0x0450,0x0350 }, +{ 0x04a0,0x04a0,0x03c0 }, +{ 0x0460,0x0490,0x0410 }, +{ 0x0440,0x0460,0x0470 }, +{ 0x0440,0x0440,0x04a0 }, +{ 0x0520,0x0480,0x0460 }, +{ 0x0800,0x0630,0x0440 }, +{ 0x0840,0x0840,0x0450 }, +{ 0x0840,0x0840,0x04e0 }, +}; + +static const UINT8 baptab[64]= { + 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, + 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, + 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, + 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, + 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, + 15, 15, 15, 15, +}; + +static const UINT8 sdecaytab[4]={ + 0x0f, 0x11, 0x13, 0x15, +}; + +static const UINT8 fdecaytab[4]={ + 0x3f, 0x53, 0x67, 0x7b, +}; + +static const UINT16 sgaintab[4]= { + 0x540, 0x4d8, 0x478, 0x410, +}; + +static const UINT16 dbkneetab[4]= { + 0x000, 0x700, 0x900, 0xb00, +}; + +static const UINT16 floortab[8]= { + 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800, +}; + +static const UINT16 fgaintab[8]= { + 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, +}; + +static const UINT8 bndsz[50]={ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, + 3, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 24, 24, 24, 24, 24 +}; + +static UINT8 bndtab[51]; + +/* fft & mdct sin cos tables */ +static INT16 costab[64]; +static INT16 sintab[64]; +static INT16 fft_rev[512]; +static INT16 xcos1[128]; +static INT16 xsin1[128]; + +static UINT16 crc_table[256]; diff --git a/libavcodec/apiexample.c b/libavcodec/apiexample.c new file mode 100644 index 0000000000..e7ae6e80be --- /dev/null +++ b/libavcodec/apiexample.c @@ -0,0 +1,407 @@ +/* avcodec API use example. + * + * Note that this library only handles codecs (mpeg, mpeg4, etc...), + * not file formats (avi, vob, etc...). See library 'libav' for the + * format handling + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> + +#include "avcodec.h" + +#define INBUF_SIZE 4096 + +/* + * Audio encoding example + */ +void audio_encode_example(const char *filename) +{ + AVCodec *codec; + AVCodecContext codec_context, *c = &codec_context; + int frame_size, i, j, out_size, outbuf_size; + FILE *f; + short *samples; + float t, tincr; + UINT8 *outbuf; + + printf("Audio encoding\n"); + + /* find the MP2 encoder */ + codec = avcodec_find_encoder(CODEC_ID_MP2); + if (!codec) { + fprintf(stderr, "codec not found\n"); + exit(1); + } + + /* put default values */ + memset(c, 0, sizeof(*c)); + + /* put sample parameters */ + c->bit_rate = 64000; + c->sample_rate = 44100; + c->channels = 2; + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + /* the codec gives us the frame size, in samples */ + frame_size = c->frame_size; + samples = malloc(frame_size * 2 * c->channels); + outbuf_size = 10000; + outbuf = malloc(outbuf_size); + + f = fopen(filename, "w"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + + /* encode a single tone sound */ + t = 0; + tincr = 2 * M_PI * 440.0 / c->sample_rate; + for(i=0;i<200;i++) { + for(j=0;j<frame_size;j++) { + samples[2*j] = (int)(sin(t) * 10000); + samples[2*j+1] = samples[2*j]; + t += tincr; + } + /* encode the samples */ + out_size = avcodec_encode_audio(c, outbuf, outbuf_size, samples); + fwrite(outbuf, 1, out_size, f); + } + fclose(f); + free(outbuf); + free(samples); + + avcodec_close(c); +} + +/* + * Audio decoding. + */ +void audio_decode_example(const char *outfilename, const char *filename) +{ + AVCodec *codec; + AVCodecContext codec_context, *c = &codec_context; + int out_size, size, len; + FILE *f, *outfile; + UINT8 *outbuf; + UINT8 inbuf[INBUF_SIZE], *inbuf_ptr; + + printf("Audio decoding\n"); + + /* find the mpeg audio decoder */ + codec = avcodec_find_decoder(CODEC_ID_MP2); + if (!codec) { + fprintf(stderr, "codec not found\n"); + exit(1); + } + + /* put default values */ + memset(c, 0, sizeof(*c)); + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + outbuf = malloc(AVCODEC_MAX_AUDIO_FRAME_SIZE); + + f = fopen(filename, "r"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + outfile = fopen(outfilename, "w"); + if (!outfile) { + fprintf(stderr, "could not open %s\n", outfilename); + exit(1); + } + + /* decode until eof */ + inbuf_ptr = inbuf; + for(;;) { + size = fread(inbuf, 1, INBUF_SIZE, f); + if (size == 0) + break; + + inbuf_ptr = inbuf; + while (size > 0) { + len = avcodec_decode_audio(c, (short *)outbuf, &out_size, + inbuf_ptr, size); + if (len < 0) { + fprintf(stderr, "Error while decoding\n"); + exit(1); + } + if (out_size > 0) { + /* if a frame has been decoded, output it */ + fwrite(outbuf, 1, out_size, outfile); + } + size -= len; + inbuf_ptr += len; + } + } + + fclose(outfile); + fclose(f); + free(outbuf); + + avcodec_close(c); +} + +/* + * Video encoding example + */ +void video_encode_example(const char *filename) +{ + AVCodec *codec; + AVCodecContext codec_context, *c = &codec_context; + int i, out_size, size, x, y, outbuf_size; + FILE *f; + AVPicture picture; + UINT8 *outbuf, *picture_buf; + + printf("Video encoding\n"); + + /* find the mpeg1 video encoder */ + codec = avcodec_find_encoder(CODEC_ID_MPEG1VIDEO); + if (!codec) { + fprintf(stderr, "codec not found\n"); + exit(1); + } + + /* put default values */ + memset(c, 0, sizeof(*c)); + + /* put sample parameters */ + c->bit_rate = 400000; + /* resolution must be a multiple of two */ + c->width = 352; + c->height = 288; + /* frames per second */ + c->frame_rate = 25 * FRAME_RATE_BASE; + c->gop_size = 10; /* emit one intra frame every ten frames */ + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + /* the codec gives us the frame size, in samples */ + + f = fopen(filename, "w"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + + /* alloc image and output buffer */ + outbuf_size = 100000; + outbuf = malloc(outbuf_size); + size = c->width * c->height; + picture_buf = malloc((size * 3) / 2); /* size for YUV 420 */ + + picture.data[0] = picture_buf; + picture.data[1] = picture.data[0] + size; + picture.data[2] = picture.data[1] + size / 4; + picture.linesize[0] = c->width; + picture.linesize[1] = c->width / 2; + picture.linesize[2] = c->width / 2; + + /* encode 1 second of video */ + for(i=0;i<25;i++) { + printf("encoding frame %3d\r", i); + fflush(stdout); + /* prepare a dummy image */ + /* Y */ + for(y=0;y<c->height;y++) { + for(x=0;x<c->width;x++) { + picture.data[0][y * picture.linesize[0] + x] = x + y + i * 3; + } + } + + /* Cb and Cr */ + for(y=0;y<c->height/2;y++) { + for(x=0;x<c->width/2;x++) { + picture.data[1][y * picture.linesize[1] + x] = 128 + y + i * 2; + picture.data[2][y * picture.linesize[2] + x] = 64 + x + i * 5; + } + } + + /* encode the image */ + out_size = avcodec_encode_video(c, outbuf, outbuf_size, &picture); + fwrite(outbuf, 1, out_size, f); + } + + /* add sequence end code to have a real mpeg file */ + outbuf[0] = 0x00; + outbuf[1] = 0x00; + outbuf[2] = 0x01; + outbuf[3] = 0xb7; + fwrite(outbuf, 1, 4, f); + fclose(f); + free(picture_buf); + free(outbuf); + + avcodec_close(c); + printf("\n"); +} + +/* + * Video decoding example + */ + +void pgm_save(unsigned char *buf,int wrap, int xsize,int ysize,char *filename) +{ + FILE *f; + int i; + + f=fopen(filename,"w"); + fprintf(f,"P5\n%d %d\n%d\n",xsize,ysize,255); + for(i=0;i<ysize;i++) + fwrite(buf + i * wrap,1,xsize,f); + fclose(f); +} + +void video_decode_example(const char *outfilename, const char *filename) +{ + AVCodec *codec; + AVCodecContext codec_context, *c = &codec_context; + int frame, size, got_picture, len; + FILE *f; + AVPicture picture; + UINT8 inbuf[INBUF_SIZE], *inbuf_ptr; + char buf[1024]; + + printf("Video decoding\n"); + + /* find the mpeg1 video decoder */ + codec = avcodec_find_decoder(CODEC_ID_MPEG1VIDEO); + if (!codec) { + fprintf(stderr, "codec not found\n"); + exit(1); + } + + /* put default values */ + memset(c, 0, sizeof(*c)); + + /* for some codecs, such as msmpeg4 and opendivx, width and height + MUST be initialized there because these info are not available + in the bitstream */ + + /* open it */ + if (avcodec_open(c, codec) < 0) { + fprintf(stderr, "could not open codec\n"); + exit(1); + } + + /* the codec gives us the frame size, in samples */ + + f = fopen(filename, "r"); + if (!f) { + fprintf(stderr, "could not open %s\n", filename); + exit(1); + } + + frame = 0; + for(;;) { + size = fread(inbuf, 1, INBUF_SIZE, f); + if (size == 0) + break; + + /* NOTE1: some codecs are stream based (mpegvideo, mpegaudio) + and this is the only method to use them because you cannot + know the compressed data size before analysing it. + + BUT some other codecs (msmpeg4, opendivx) are inherently + frame based, so you must call them with all the data for + one frame exactly. You must also initialize 'width' and + 'height' before initializing them. */ + + /* NOTE2: some codecs allow the raw parameters (frame size, + sample rate) to be changed at any frame. We handle this, so + you should also take care of it */ + + /* here, we use a stream based decoder (mpeg1video), so we + feed decoder and see if it could decode a frame */ + inbuf_ptr = inbuf; + while (size > 0) { + len = avcodec_decode_video(c, &picture, &got_picture, + inbuf_ptr, size); + if (len < 0) { + fprintf(stderr, "Error while decoding frame %d\n", frame); + exit(1); + } + if (got_picture) { + printf("saving frame %3d\r", frame); + fflush(stdout); + + /* the picture is allocated by the decoder. no need to + free it */ + snprintf(buf, sizeof(buf), outfilename, frame); + pgm_save(picture.data[0], picture.linesize[0], + c->width, c->height, buf); + frame++; + } + size -= len; + inbuf_ptr += len; + } + } + + /* some codecs, such as MPEG, transmit the I and P frame with a + latency of one frame. You must do the following to have a + chance to get the last frame of the video */ + len = avcodec_decode_video(c, &picture, &got_picture, + NULL, 0); + if (got_picture) { + printf("saving frame %3d\r", frame); + fflush(stdout); + + /* the picture is allocated by the decoder. no need to + free it */ + snprintf(buf, sizeof(buf), outfilename, frame); + pgm_save(picture.data[0], picture.linesize[0], + c->width, c->height, buf); + frame++; + } + + fclose(f); + + avcodec_close(c); + printf("\n"); +} + + +int main(int argc, char **argv) +{ + const char *filename; + + /* must be called before using avcodec lib */ + avcodec_init(); + + /* register all the codecs (you can also register only the codec + you wish to have smaller code */ + avcodec_register_all(); + + if (argc <= 1) { + audio_encode_example("/tmp/test.mp2"); + audio_decode_example("/tmp/test.sw", "/tmp/test.mp2"); + + video_encode_example("/tmp/test.mpg"); + filename = "/tmp/test.mpg"; + } else { + filename = argv[1]; + } + + // audio_decode_example("/tmp/test.sw", filename); + video_decode_example("/tmp/test%d.pgm", filename); + + return 0; +} diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h new file mode 100644 index 0000000000..8516d2a176 --- /dev/null +++ b/libavcodec/avcodec.h @@ -0,0 +1,177 @@ +#include "common.h" + +enum CodecID { + CODEC_ID_NONE, + CODEC_ID_MPEG1VIDEO, + CODEC_ID_H263, + CODEC_ID_RV10, + CODEC_ID_MP2, + CODEC_ID_AC3, + CODEC_ID_MJPEG, + CODEC_ID_OPENDIVX, + CODEC_ID_PCM, + CODEC_ID_RAWVIDEO, + CODEC_ID_MSMPEG4, + CODEC_ID_H263P, + CODEC_ID_H263I, +}; + +enum CodecType { + CODEC_TYPE_VIDEO, + CODEC_TYPE_AUDIO, +}; + +enum PixelFormat { + PIX_FMT_YUV420P, + PIX_FMT_YUV422, + PIX_FMT_RGB24, + PIX_FMT_BGR24, +}; + +/* in bytes */ +#define AVCODEC_MAX_AUDIO_FRAME_SIZE 18432 + +/* motion estimation type */ +extern int motion_estimation_method; +#define ME_ZERO 0 +#define ME_FULL 1 +#define ME_LOG 2 +#define ME_PHODS 3 + +/* encoding support */ + +#define CODEC_FLAG_HQ 0x0001 /* high quality (non real time) encoding */ +#define CODEC_FLAG_QSCALE 0x0002 /* use fixed qscale */ + +#define FRAME_RATE_BASE 10000 + +typedef struct AVCodecContext { + int bit_rate; + int flags; + int sub_id; /* some codecs needs additionnal format info. It is + stored there */ + /* video only */ + int frame_rate; /* frames per sec multiplied by FRAME_RATE_BASE */ + int width, height; + int gop_size; /* 0 = intra only */ + int pix_fmt; /* pixel format, see PIX_FMT_xxx */ + /* audio only */ + int sample_rate; /* samples per sec */ + int channels; + + /* the following data should not be initialized */ + int frame_size; /* in samples, initialized when calling 'init' */ + int frame_number; /* audio or video frame number */ + int key_frame; /* true if the previous compressed frame was + a key frame (intra, or seekable) */ + int quality; /* quality of the previous encoded frame + (between 1 (good) and 31 (bad)) */ + struct AVCodec *codec; + void *priv_data; + + /* the following fields are ignored */ + char codec_name[32]; + int codec_type; /* see CODEC_TYPE_xxx */ + int codec_id; /* see CODEC_ID_xxx */ + unsigned int codec_tag; /* codec tag, only used if unknown codec */ +} AVCodecContext; + +typedef struct AVCodec { + char *name; + int type; + int id; + int priv_data_size; + int (*init)(AVCodecContext *); + int (*encode)(AVCodecContext *, UINT8 *buf, int buf_size, void *data); + int (*close)(AVCodecContext *); + int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, + UINT8 *buf, int buf_size); + struct AVCodec *next; +} AVCodec; + +/* three components are given, that's all */ +typedef struct AVPicture { + UINT8 *data[3]; + int linesize[3]; +} AVPicture; + +extern AVCodec ac3_encoder; +extern AVCodec mp2_encoder; +extern AVCodec mpeg1video_encoder; +extern AVCodec h263_encoder; +extern AVCodec h263p_encoder; +extern AVCodec rv10_encoder; +extern AVCodec mjpeg_encoder; +extern AVCodec opendivx_encoder; +extern AVCodec msmpeg4_encoder; + +extern AVCodec h263_decoder; +extern AVCodec opendivx_decoder; +extern AVCodec msmpeg4_decoder; +extern AVCodec mpeg_decoder; +extern AVCodec h263i_decoder; +extern AVCodec rv10_decoder; + +/* dummy raw codecs */ +extern AVCodec pcm_codec; +extern AVCodec rawvideo_codec; + +/* the following codecs use external GPL libs */ +extern AVCodec mp3_decoder; +extern AVCodec ac3_decoder; + +/* resample.c */ + +struct ReSampleContext; + +typedef struct ReSampleContext ReSampleContext; + +ReSampleContext *audio_resample_init(int output_channels, int input_channels, + int output_rate, int input_rate); +int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples); +void audio_resample_close(ReSampleContext *s); + +/* YUV420 format is assumed ! */ + +struct ImgReSampleContext; + +typedef struct ImgReSampleContext ImgReSampleContext; + +ImgReSampleContext *img_resample_init(int output_width, int output_height, + int input_width, int input_height); +void img_resample(ImgReSampleContext *s, + AVPicture *output, AVPicture *input); + +void img_resample_close(ImgReSampleContext *s); + +int img_convert_to_yuv420(UINT8 *img_out, UINT8 *img, + int pix_fmt, int width, int height); + +/* external high level API */ + +extern AVCodec *first_avcodec; + +void avcodec_init(void); + +void register_avcodec(AVCodec *format); +AVCodec *avcodec_find_encoder(enum CodecID id); +AVCodec *avcodec_find_decoder(enum CodecID id); +AVCodec *avcodec_find_decoder_by_name(const char *name); +void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode); + +int avcodec_open(AVCodecContext *avctx, AVCodec *codec); +int avcodec_decode_audio(AVCodecContext *avctx, INT16 *samples, + int *frame_size_ptr, + UINT8 *buf, int buf_size); +int avcodec_decode_video(AVCodecContext *avctx, AVPicture *picture, + int *got_picture_ptr, + UINT8 *buf, int buf_size); +int avcodec_encode_audio(AVCodecContext *avctx, UINT8 *buf, int buf_size, + const short *samples); +int avcodec_encode_video(AVCodecContext *avctx, UINT8 *buf, int buf_size, + const AVPicture *pict); + +int avcodec_close(AVCodecContext *avctx); + +void avcodec_register_all(void); +void avcodec_register_more(void); diff --git a/libavcodec/common.c b/libavcodec/common.c new file mode 100644 index 0000000000..b9365c9d8d --- /dev/null +++ b/libavcodec/common.c @@ -0,0 +1,469 @@ +/* + * Common bit i/o utils + * Copyright (c) 2000, 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifdef __FreeBSD__ +#include <sys/param.h> +#endif +#include <netinet/in.h> +#include <math.h> +#include "common.h" + +#define NDEBUG +#include <assert.h> + +void init_put_bits(PutBitContext *s, + UINT8 *buffer, int buffer_size, + void *opaque, + void (*write_data)(void *, UINT8 *, int)) +{ + s->buf = buffer; + s->buf_ptr = s->buf; + s->buf_end = s->buf + buffer_size; + s->bit_cnt=0; + s->bit_buf=0; + s->data_out_size = 0; + s->write_data = write_data; + s->opaque = opaque; +} + +static void flush_buffer(PutBitContext *s) +{ + int size; + if (s->write_data) { + size = s->buf_ptr - s->buf; + if (size > 0) + s->write_data(s->opaque, s->buf, size); + s->buf_ptr = s->buf; + s->data_out_size += size; + } +} + +void put_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf; + int bit_cnt; + +#ifdef STATS + st_out_bit_counts[st_current_index] += n; +#endif + // printf("put_bits=%d %x\n", n, value); + assert(n == 32 || value < (1U << n)); + + bit_buf = s->bit_buf; + bit_cnt = s->bit_cnt; + + // printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); + /* XXX: optimize */ + if (n < (32-bit_cnt)) { + bit_buf |= value << (32 - n - bit_cnt); + bit_cnt+=n; + } else { + bit_buf |= value >> (n + bit_cnt - 32); + *(UINT32 *)s->buf_ptr = htonl(bit_buf); + //printf("bitbuf = %08x\n", bit_buf); + s->buf_ptr+=4; + if (s->buf_ptr >= s->buf_end) + flush_buffer(s); + bit_cnt=bit_cnt + n - 32; + if (bit_cnt == 0) { + bit_buf = 0; + } else { + bit_buf = value << (32 - bit_cnt); + } + } + + s->bit_buf = bit_buf; + s->bit_cnt = bit_cnt; +} + +/* return the number of bits output */ +long long get_bit_count(PutBitContext *s) +{ + return (s->buf_ptr - s->buf + s->data_out_size) * 8 + (long long)s->bit_cnt; +} + +void align_put_bits(PutBitContext *s) +{ + put_bits(s,(8 - s->bit_cnt) & 7,0); +} + +/* pad the end of the output stream with zeros */ +void flush_put_bits(PutBitContext *s) +{ + while (s->bit_cnt > 0) { + /* XXX: should test end of buffer */ + *s->buf_ptr++=s->bit_buf >> 24; + s->bit_buf<<=8; + s->bit_cnt-=8; + } + flush_buffer(s); + s->bit_cnt=0; + s->bit_buf=0; +} + +/* for jpeg : espace 0xff with 0x00 after it */ +void jput_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf, b; + int bit_cnt, i; + + assert(n == 32 || value < (1U << n)); + + bit_buf = s->bit_buf; + bit_cnt = s->bit_cnt; + + //printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); + /* XXX: optimize */ + if (n < (32-bit_cnt)) { + bit_buf |= value << (32 - n - bit_cnt); + bit_cnt+=n; + } else { + bit_buf |= value >> (n + bit_cnt - 32); + /* handle escape */ + for(i=0;i<4;i++) { + b = (bit_buf >> 24); + *(s->buf_ptr++) = b; + if (b == 0xff) + *(s->buf_ptr++) = 0; + bit_buf <<= 8; + } + /* we flush the buffer sooner to handle worst case */ + if (s->buf_ptr >= (s->buf_end - 8)) + flush_buffer(s); + + bit_cnt=bit_cnt + n - 32; + if (bit_cnt == 0) { + bit_buf = 0; + } else { + bit_buf = value << (32 - bit_cnt); + } + } + + s->bit_buf = bit_buf; + s->bit_cnt = bit_cnt; +} + +/* pad the end of the output stream with zeros */ +void jflush_put_bits(PutBitContext *s) +{ + unsigned int b; + + while (s->bit_cnt > 0) { + b = s->bit_buf >> 24; + *s->buf_ptr++ = b; + if (b == 0xff) + *s->buf_ptr++ = 0; + s->bit_buf<<=8; + s->bit_cnt-=8; + } + flush_buffer(s); + s->bit_cnt=0; + s->bit_buf=0; +} + +/* bit input functions */ + +void init_get_bits(GetBitContext *s, + UINT8 *buffer, int buffer_size) +{ + s->buf = buffer; + s->buf_ptr = buffer; + s->buf_end = buffer + buffer_size; + s->bit_cnt = 0; + s->bit_buf = 0; + while (s->buf_ptr < s->buf_end && + s->bit_cnt < 32) { + s->bit_buf |= (*s->buf_ptr++ << (24 - s->bit_cnt)); + s->bit_cnt += 8; + } +} + +/* n must be >= 1 and <= 32 */ +unsigned int get_bits(GetBitContext *s, int n) +{ + unsigned int val; + int bit_cnt; + unsigned int bit_buf; + UINT8 *buf_ptr; + +#ifdef STATS + st_bit_counts[st_current_index] += n; +#endif + + bit_cnt = s->bit_cnt; + bit_buf = s->bit_buf; + + bit_cnt -= n; + if (bit_cnt >= 0) { + /* most common case here */ + val = bit_buf >> (32 - n); + bit_buf <<= n; + } else { + val = bit_buf >> (32 - n); + buf_ptr = s->buf_ptr; + buf_ptr += 4; + /* handle common case: we can read everything */ + if (buf_ptr <= s->buf_end) { + bit_buf = (buf_ptr[-4] << 24) | + (buf_ptr[-3] << 16) | + (buf_ptr[-2] << 8) | + (buf_ptr[-1]); + } else { + buf_ptr -= 4; + bit_buf = 0; + if (buf_ptr < s->buf_end) + bit_buf |= *buf_ptr++ << 24; + if (buf_ptr < s->buf_end) + bit_buf |= *buf_ptr++ << 16; + if (buf_ptr < s->buf_end) + bit_buf |= *buf_ptr++ << 8; + if (buf_ptr < s->buf_end) + bit_buf |= *buf_ptr++; + } + s->buf_ptr = buf_ptr; + val |= bit_buf >> (32 + bit_cnt); + bit_buf <<= - bit_cnt; + bit_cnt += 32; + } + s->bit_buf = bit_buf; + s->bit_cnt = bit_cnt; + return val; +} + +void align_get_bits(GetBitContext *s) +{ + int n; + n = s->bit_cnt & 7; + if (n > 0) { + get_bits(s, n); + } +} + +/* VLC decoding */ + +//#define DEBUG_VLC + +#define GET_DATA(v, table, i, wrap, size) \ +{\ + UINT8 *ptr = (UINT8 *)table + i * wrap;\ + switch(size) {\ + case 1:\ + v = *(UINT8 *)ptr;\ + break;\ + case 2:\ + v = *(UINT16 *)ptr;\ + break;\ + default:\ + v = *(UINT32 *)ptr;\ + break;\ + }\ +} + + +static int alloc_table(VLC *vlc, int size) +{ + int index; + index = vlc->table_size; + vlc->table_size += size; + if (vlc->table_size > vlc->table_allocated) { + vlc->table_allocated += (1 << vlc->bits); + vlc->table_bits = realloc(vlc->table_bits, + sizeof(INT8) * vlc->table_allocated); + vlc->table_codes = realloc(vlc->table_codes, + sizeof(INT16) * vlc->table_allocated); + if (!vlc->table_bits || + !vlc->table_codes) + return -1; + } + return index; +} + +static int build_table(VLC *vlc, int table_nb_bits, + int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + UINT32 code_prefix, int n_prefix) +{ + int i, j, k, n, table_size, table_index, nb, n1, index; + UINT32 code; + INT8 *table_bits; + INT16 *table_codes; + + table_size = 1 << table_nb_bits; + table_index = alloc_table(vlc, table_size); +#ifdef DEBUG_VLC + printf("new table index=%d size=%d code_prefix=%x n=%d\n", + table_index, table_size, code_prefix, n_prefix); +#endif + if (table_index < 0) + return -1; + table_bits = &vlc->table_bits[table_index]; + table_codes = &vlc->table_codes[table_index]; + + for(i=0;i<table_size;i++) { + table_bits[i] = 0; + table_codes[i] = -1; + } + + /* first pass: map codes and compute auxillary table sizes */ + for(i=0;i<nb_codes;i++) { + GET_DATA(n, bits, i, bits_wrap, bits_size); + GET_DATA(code, codes, i, codes_wrap, codes_size); + /* we accept tables with holes */ + if (n <= 0) + continue; +#if defined(DEBUG_VLC) && 0 + printf("i=%d n=%d code=0x%x\n", i, n, code); +#endif + /* if code matches the prefix, it is in the table */ + n -= n_prefix; + if (n > 0 && (code >> n) == code_prefix) { + if (n <= table_nb_bits) { + /* no need to add another table */ + j = (code << (table_nb_bits - n)) & (table_size - 1); + nb = 1 << (table_nb_bits - n); + for(k=0;k<nb;k++) { +#ifdef DEBUG_VLC + printf("%4x: code=%d n=%d\n", + j, i, n); +#endif + if (table_bits[j] != 0) { + fprintf(stderr, "incorrect codes\n"); + exit(1); + } + table_bits[j] = n; + table_codes[j] = i; + j++; + } + } else { + n -= table_nb_bits; + j = (code >> n) & ((1 << table_nb_bits) - 1); +#ifdef DEBUG_VLC + printf("%4x: n=%d (subtable)\n", + j, n); +#endif + /* compute table size */ + n1 = -table_bits[j]; + if (n > n1) + n1 = n; + table_bits[j] = -n1; + } + } + } + + /* second pass : fill auxillary tables recursively */ + for(i=0;i<table_size;i++) { + n = table_bits[i]; + if (n < 0) { + n = -n; + if (n > table_nb_bits) { + n = table_nb_bits; + table_bits[i] = -n; + } + index = build_table(vlc, n, nb_codes, + bits, bits_wrap, bits_size, + codes, codes_wrap, codes_size, + (code_prefix << table_nb_bits) | i, + n_prefix + table_nb_bits); + if (index < 0) + return -1; + /* note: realloc has been done, so reload tables */ + table_bits = &vlc->table_bits[table_index]; + table_codes = &vlc->table_codes[table_index]; + table_codes[i] = index; + } + } + return table_index; +} + + +/* wrap and size allow to handle most types of storage. */ +int init_vlc(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size) +{ + vlc->bits = nb_bits; + vlc->table_bits = NULL; + vlc->table_codes = NULL; + vlc->table_allocated = 0; + vlc->table_size = 0; +#ifdef DEBUG_VLC + printf("build table nb_codes=%d\n", nb_codes); +#endif + + if (build_table(vlc, nb_bits, nb_codes, + bits, bits_wrap, bits_size, + codes, codes_wrap, codes_size, + 0, 0) < 0) { + if (vlc->table_bits) + free(vlc->table_bits); + if (vlc->table_codes) + free(vlc->table_codes); + return -1; + } + return 0; +} + + +void free_vlc(VLC *vlc) +{ + free(vlc->table_bits); + free(vlc->table_codes); +} + +int get_vlc(GetBitContext *s, VLC *vlc) +{ + int bit_cnt, code, n, nb_bits, index; + UINT32 bit_buf; + INT16 *table_codes; + INT8 *table_bits; + UINT8 *buf_ptr; + + SAVE_BITS(s); + nb_bits = vlc->bits; + table_codes = vlc->table_codes; + table_bits = vlc->table_bits; + for(;;) { + SHOW_BITS(s, index, nb_bits); + code = table_codes[index]; + n = table_bits[index]; + if (n > 0) { + /* most common case */ + FLUSH_BITS(n); +#ifdef STATS + st_bit_counts[st_current_index] += n; +#endif + break; + } else if (n == 0) { + return -1; + } else { + FLUSH_BITS(nb_bits); +#ifdef STATS + st_bit_counts[st_current_index] += nb_bits; +#endif + nb_bits = -n; + table_codes = vlc->table_codes + code; + table_bits = vlc->table_bits + code; + } + } + RESTORE_BITS(s); + return code; +} diff --git a/libavcodec/common.h b/libavcodec/common.h new file mode 100644 index 0000000000..bba7d1fb9d --- /dev/null +++ b/libavcodec/common.h @@ -0,0 +1,170 @@ +#ifndef COMMON_H +#define COMMON_H + +#include "../config.h" + +#ifndef USE_LIBAVCODEC +// workaround for typedef conflict in MPlayer +typedef unsigned short UINT16; +typedef signed short INT16; +#endif + +typedef unsigned char UINT8; +typedef unsigned int UINT32; +typedef unsigned long long UINT64; +typedef signed char INT8; +typedef signed int INT32; +typedef signed long long INT64; + +/* bit output */ + +struct PutBitContext; + +typedef void (*WriteDataFunc)(void *, UINT8 *, int); + +typedef struct PutBitContext { + UINT8 *buf, *buf_ptr, *buf_end; + int bit_cnt; + UINT32 bit_buf; + long long data_out_size; /* in bytes */ + void *opaque; + WriteDataFunc write_data; +} PutBitContext; + +void init_put_bits(PutBitContext *s, + UINT8 *buffer, int buffer_size, + void *opaque, + void (*write_data)(void *, UINT8 *, int)); +void put_bits(PutBitContext *s, int n, unsigned int value); +long long get_bit_count(PutBitContext *s); +void align_put_bits(PutBitContext *s); +void flush_put_bits(PutBitContext *s); + +/* jpeg specific put_bits */ +void jput_bits(PutBitContext *s, int n, unsigned int value); +void jflush_put_bits(PutBitContext *s); + +/* bit input */ + +typedef struct GetBitContext { + UINT8 *buf, *buf_ptr, *buf_end; + int bit_cnt; + UINT32 bit_buf; +} GetBitContext; + +typedef struct VLC { + int bits; + INT16 *table_codes; + INT8 *table_bits; + int table_size, table_allocated; +} VLC; + +void init_get_bits(GetBitContext *s, + UINT8 *buffer, int buffer_size); + +unsigned int get_bits(GetBitContext *s, int n); +void align_get_bits(GetBitContext *s); +int init_vlc(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size); +void free_vlc(VLC *vlc); +int get_vlc(GetBitContext *s, VLC *vlc); + +/* macro to go faster */ +/* n must be <= 24 */ +/* XXX: optimize buffer end test */ +#define SHOW_BITS(s, val, n)\ +{\ + if (bit_cnt < n && buf_ptr < (s)->buf_end) {\ + bit_buf |= *buf_ptr++ << (24 - bit_cnt);\ + bit_cnt += 8;\ + if (bit_cnt < n && buf_ptr < (s)->buf_end) {\ + bit_buf |= *buf_ptr++ << (24 - bit_cnt);\ + bit_cnt += 8;\ + if (bit_cnt < n && buf_ptr < (s)->buf_end) {\ + bit_buf |= *buf_ptr++ << (24 - bit_cnt);\ + bit_cnt += 8;\ + }\ + }\ + }\ + val = bit_buf >> (32 - n);\ +} + +/* SHOW_BITS with n1 >= n must be been done before */ +#define FLUSH_BITS(n)\ +{\ + bit_buf <<= n;\ + bit_cnt -= n;\ +} + +#define SAVE_BITS(s) \ +{\ + bit_cnt = (s)->bit_cnt;\ + bit_buf = (s)->bit_buf;\ + buf_ptr = (s)->buf_ptr;\ +} + +#define RESTORE_BITS(s) \ +{\ + (s)->buf_ptr = buf_ptr;\ + (s)->bit_buf = bit_buf;\ + (s)->bit_cnt = bit_cnt;\ +} + +/* define it to include statistics code (useful only for optimizing + codec efficiency */ +//#define STATS + +#ifdef STATS + +enum { + ST_UNKNOWN, + ST_DC, + ST_INTRA_AC, + ST_INTER_AC, + ST_INTRA_MB, + ST_INTER_MB, + ST_MV, + ST_NB, +}; + +extern int st_current_index; +extern unsigned int st_bit_counts[ST_NB]; +extern unsigned int st_out_bit_counts[ST_NB]; + +void print_stats(void); +#endif + +/* misc math functions */ + +extern inline int log2(unsigned int v) +{ + int n; + + n = 0; + if (v & 0xffff0000) { + v >>= 16; + n += 16; + } + if (v & 0xff00) { + v >>= 8; + n += 8; + } + if (v & 0xf0) { + v >>= 4; + n += 4; + } + if (v & 0xc) { + v >>= 2; + n += 2; + } + if (v & 0x2) { + n++; + } + return n; +} + +/* memory */ +void *av_mallocz(int size); + +#endif diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c new file mode 100644 index 0000000000..1da23a1d8e --- /dev/null +++ b/libavcodec/dct-test.c @@ -0,0 +1,105 @@ +/* DCT test. (c) 2001 Gerard Lantau. + Started from sample code by Juan J. Sierralta P. +*/ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/time.h> +#include <unistd.h> + +#include "dsputil.h" + +extern void fdct(DCTELEM *block); +extern void init_fdct(); + +#define AANSCALE_BITS 12 +static const unsigned short aanscales[64] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 +}; + +INT64 gettime(void) +{ + struct timeval tv; + gettimeofday(&tv,NULL); + return (INT64)tv.tv_sec * 1000000 + tv.tv_usec; +} + +#define NB_ITS 20000 +#define NB_ITS_SPEED 50000 + +void dct_error(const char *name, + void (*fdct_func)(DCTELEM *block)) +{ + int it, i, scale; + DCTELEM block[64], block1[64]; + int err_inf, v; + INT64 err2, ti, ti1, it1; + + srandom(0); + + err_inf = 0; + err2 = 0; + for(it=0;it<NB_ITS;it++) { + for(i=0;i<64;i++) + block1[i] = random() % 256; + memcpy(block, block1, sizeof(DCTELEM) * 64); + + fdct_func(block); + if (fdct_func == jpeg_fdct_ifast) { + for(i=0; i<64; i++) { + scale = (1 << (AANSCALE_BITS + 11)) / aanscales[i]; + block[i] = (block[i] * scale) >> AANSCALE_BITS; + } + } + + fdct(block1); + + for(i=0;i<64;i++) { + v = abs(block[i] - block1[i]); + if (v > err_inf) + err_inf = v; + err2 += v * v; + } + } + printf("DCT %s: err_inf=%d err2=%0.2f\n", + name, err_inf, (double)err2 / NB_ITS / 64.0); + + /* speed test */ + for(i=0;i<64;i++) + block1[i] = 255 - 63 + i; + + ti = gettime(); + it1 = 0; + do { + for(it=0;it<NB_ITS_SPEED;it++) { + memcpy(block, block1, sizeof(DCTELEM) * 64); + fdct_func(block); + } + it1 += NB_ITS_SPEED; + ti1 = gettime() - ti; + } while (ti1 < 1000000); + + printf("DCT %s: %0.1f kdct/s\n", + name, (double)it1 * 1000.0 / (double)ti1); +} + +int main(int argc, char **argv) +{ + init_fdct(); + + printf("ffmpeg DCT test\n"); + + dct_error("REF", fdct); /* only to verify code ! */ + dct_error("AAN", jpeg_fdct_ifast); + dct_error("MMX", fdct_mmx); + return 0; +} + diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c new file mode 100644 index 0000000000..7e544a4524 --- /dev/null +++ b/libavcodec/dsputil.c @@ -0,0 +1,383 @@ +/* + * DSP utils + * Copyright (c) 2000, 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include "avcodec.h" +#include "dsputil.h" + +#ifdef CONFIG_MMX +int mm_flags; /* multimedia extension flags */ +#endif + +void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); +void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); +void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); + +op_pixels_abs_func pix_abs16x16; +op_pixels_abs_func pix_abs16x16_x2; +op_pixels_abs_func pix_abs16x16_y2; +op_pixels_abs_func pix_abs16x16_xy2; + +static UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; +UINT32 squareTbl[512]; + +void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) +{ + DCTELEM *p; + const UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + p[0] = pix[0]; + p[1] = pix[1]; + p[2] = pix[2]; + p[3] = pix[3]; + p[4] = pix[4]; + p[5] = pix[5]; + p[6] = pix[6]; + p[7] = pix[7]; + pix += line_size; + p += 8; + } +} + +void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) +{ + const DCTELEM *p; + UINT8 *pix; + int i; + UINT8 *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + pix[0] = cm[p[0]]; + pix[1] = cm[p[1]]; + pix[2] = cm[p[2]]; + pix[3] = cm[p[3]]; + pix[4] = cm[p[4]]; + pix[5] = cm[p[5]]; + pix[6] = cm[p[6]]; + pix[7] = cm[p[7]]; + pix += line_size; + p += 8; + } +} + +void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) +{ + const DCTELEM *p; + UINT8 *pix; + int i; + UINT8 *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<8;i++) { + pix[0] = cm[pix[0] + p[0]]; + pix[1] = cm[pix[1] + p[1]]; + pix[2] = cm[pix[2] + p[2]]; + pix[3] = cm[pix[3] + p[3]]; + pix[4] = cm[pix[4] + p[4]]; + pix[5] = cm[pix[5] + p[5]]; + pix[6] = cm[pix[6] + p[6]]; + pix[7] = cm[pix[7] + p[7]]; + pix += line_size; + p += 8; + } +} + +#define PIXOP(BTYPE, OPNAME, OP, INCR) \ + \ +static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ +{ \ + BTYPE *p; \ + const UINT8 *pix; \ + \ + p = block; \ + pix = pixels; \ + do { \ + OP(p[0], pix[0]); \ + OP(p[1], pix[1]); \ + OP(p[2], pix[2]); \ + OP(p[3], pix[3]); \ + OP(p[4], pix[4]); \ + OP(p[5], pix[5]); \ + OP(p[6], pix[6]); \ + OP(p[7], pix[7]); \ + pix += line_size; \ + p += INCR; \ + } while (--h);; \ +} \ + \ +static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ +{ \ + BTYPE *p; \ + const UINT8 *pix; \ + \ + p = block; \ + pix = pixels; \ + do { \ + OP(p[0], avg2(pix[0], pix[1])); \ + OP(p[1], avg2(pix[1], pix[2])); \ + OP(p[2], avg2(pix[2], pix[3])); \ + OP(p[3], avg2(pix[3], pix[4])); \ + OP(p[4], avg2(pix[4], pix[5])); \ + OP(p[5], avg2(pix[5], pix[6])); \ + OP(p[6], avg2(pix[6], pix[7])); \ + OP(p[7], avg2(pix[7], pix[8])); \ + pix += line_size; \ + p += INCR; \ + } while (--h); \ +} \ + \ +static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ +{ \ + BTYPE *p; \ + const UINT8 *pix; \ + const UINT8 *pix1; \ + \ + p = block; \ + pix = pixels; \ + pix1 = pixels + line_size; \ + do { \ + OP(p[0], avg2(pix[0], pix1[0])); \ + OP(p[1], avg2(pix[1], pix1[1])); \ + OP(p[2], avg2(pix[2], pix1[2])); \ + OP(p[3], avg2(pix[3], pix1[3])); \ + OP(p[4], avg2(pix[4], pix1[4])); \ + OP(p[5], avg2(pix[5], pix1[5])); \ + OP(p[6], avg2(pix[6], pix1[6])); \ + OP(p[7], avg2(pix[7], pix1[7])); \ + pix += line_size; \ + pix1 += line_size; \ + p += INCR; \ + } while(--h); \ +} \ + \ +static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ +{ \ + BTYPE *p; \ + const UINT8 *pix; \ + const UINT8 *pix1; \ + \ + p = block; \ + pix = pixels; \ + pix1 = pixels + line_size; \ + do { \ + OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \ + OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \ + OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \ + OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \ + OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \ + OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \ + OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \ + OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \ + pix += line_size; \ + pix1 += line_size; \ + p += INCR; \ + } while(--h); \ +} \ + \ +void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \ + OPNAME ## _pixels, \ + OPNAME ## _pixels_x2, \ + OPNAME ## _pixels_y2, \ + OPNAME ## _pixels_xy2, \ +}; + + +/* rounding primitives */ +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define op_put(a, b) a = b +#define op_avg(a, b) a = avg2(a, b) +#define op_sub(a, b) a -= b + +PIXOP(UINT8, put, op_put, line_size) +PIXOP(UINT8, avg, op_avg, line_size) + +PIXOP(DCTELEM, sub, op_sub, 8) + +/* not rounding primitives */ +#undef avg2 +#undef avg4 +#define avg2(a,b) ((a+b)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+1)>>2) + +PIXOP(UINT8, put_no_rnd, op_put, line_size) +PIXOP(UINT8, avg_no_rnd, op_avg, line_size) + +/* motion estimation */ + +#undef avg2 +#undef avg4 +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +{ + int s, i; + + s = 0; + for(i=0;i<h;i++) { + s += abs(pix1[0] - pix2[0]); + s += abs(pix1[1] - pix2[1]); + s += abs(pix1[2] - pix2[2]); + s += abs(pix1[3] - pix2[3]); + s += abs(pix1[4] - pix2[4]); + s += abs(pix1[5] - pix2[5]); + s += abs(pix1[6] - pix2[6]); + s += abs(pix1[7] - pix2[7]); + s += abs(pix1[8] - pix2[8]); + s += abs(pix1[9] - pix2[9]); + s += abs(pix1[10] - pix2[10]); + s += abs(pix1[11] - pix2[11]); + s += abs(pix1[12] - pix2[12]); + s += abs(pix1[13] - pix2[13]); + s += abs(pix1[14] - pix2[14]); + s += abs(pix1[15] - pix2[15]); + pix1 += line_size; + pix2 += line_size; + } + return s; +} + +int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +{ + int s, i; + + s = 0; + for(i=0;i<h;i++) { + s += abs(pix1[0] - avg2(pix2[0], pix2[1])); + s += abs(pix1[1] - avg2(pix2[1], pix2[2])); + s += abs(pix1[2] - avg2(pix2[2], pix2[3])); + s += abs(pix1[3] - avg2(pix2[3], pix2[4])); + s += abs(pix1[4] - avg2(pix2[4], pix2[5])); + s += abs(pix1[5] - avg2(pix2[5], pix2[6])); + s += abs(pix1[6] - avg2(pix2[6], pix2[7])); + s += abs(pix1[7] - avg2(pix2[7], pix2[8])); + s += abs(pix1[8] - avg2(pix2[8], pix2[9])); + s += abs(pix1[9] - avg2(pix2[9], pix2[10])); + s += abs(pix1[10] - avg2(pix2[10], pix2[11])); + s += abs(pix1[11] - avg2(pix2[11], pix2[12])); + s += abs(pix1[12] - avg2(pix2[12], pix2[13])); + s += abs(pix1[13] - avg2(pix2[13], pix2[14])); + s += abs(pix1[14] - avg2(pix2[14], pix2[15])); + s += abs(pix1[15] - avg2(pix2[15], pix2[16])); + pix1 += line_size; + pix2 += line_size; + } + return s; +} + +int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +{ + int s, i; + UINT8 *pix3 = pix2 + line_size; + + s = 0; + for(i=0;i<h;i++) { + s += abs(pix1[0] - avg2(pix2[0], pix3[0])); + s += abs(pix1[1] - avg2(pix2[1], pix3[1])); + s += abs(pix1[2] - avg2(pix2[2], pix3[2])); + s += abs(pix1[3] - avg2(pix2[3], pix3[3])); + s += abs(pix1[4] - avg2(pix2[4], pix3[4])); + s += abs(pix1[5] - avg2(pix2[5], pix3[5])); + s += abs(pix1[6] - avg2(pix2[6], pix3[6])); + s += abs(pix1[7] - avg2(pix2[7], pix3[7])); + s += abs(pix1[8] - avg2(pix2[8], pix3[8])); + s += abs(pix1[9] - avg2(pix2[9], pix3[9])); + s += abs(pix1[10] - avg2(pix2[10], pix3[10])); + s += abs(pix1[11] - avg2(pix2[11], pix3[11])); + s += abs(pix1[12] - avg2(pix2[12], pix3[12])); + s += abs(pix1[13] - avg2(pix2[13], pix3[13])); + s += abs(pix1[14] - avg2(pix2[14], pix3[14])); + s += abs(pix1[15] - avg2(pix2[15], pix3[15])); + pix1 += line_size; + pix2 += line_size; + pix3 += line_size; + } + return s; +} + +int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) +{ + int s, i; + UINT8 *pix3 = pix2 + line_size; + + s = 0; + for(i=0;i<h;i++) { + s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1])); + s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2])); + s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3])); + s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4])); + s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5])); + s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6])); + s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7])); + s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8])); + s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9])); + s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10])); + s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11])); + s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12])); + s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13])); + s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14])); + s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15])); + s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16])); + pix1 += line_size; + pix2 += line_size; + pix3 += line_size; + } + return s; +} + +void dsputil_init(void) +{ + int i; + + for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; + for(i=0;i<MAX_NEG_CROP;i++) { + cropTbl[i] = 0; + cropTbl[i + MAX_NEG_CROP + 256] = 255; + } + + for(i=0;i<512;i++) { + squareTbl[i] = (i - 256) * (i - 256); + } + + get_pixels = get_pixels_c; + put_pixels_clamped = put_pixels_clamped_c; + add_pixels_clamped = add_pixels_clamped_c; + + pix_abs16x16 = pix_abs16x16_c; + pix_abs16x16_x2 = pix_abs16x16_x2_c; + pix_abs16x16_y2 = pix_abs16x16_y2_c; + pix_abs16x16_xy2 = pix_abs16x16_xy2_c; + av_fdct = jpeg_fdct_ifast; + +#ifdef CONFIG_MMX + dsputil_init_mmx(); +#endif +} diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h new file mode 100644 index 0000000000..717653e813 --- /dev/null +++ b/libavcodec/dsputil.h @@ -0,0 +1,91 @@ +#ifndef DSPUTIL_H +#define DSPUTIL_H + +#include "common.h" +#include <inttypes.h> + +/* dct code */ +typedef short DCTELEM; + +void jpeg_fdct_ifast (DCTELEM *data); + +void j_rev_dct (DCTELEM *data); + +void fdct_mmx(DCTELEM *block); + +void (*av_fdct)(DCTELEM *block); + +/* pixel operations */ +#define MAX_NEG_CROP 384 + +/* temporary */ +extern UINT32 squareTbl[512]; + +void dsputil_init(void); + +/* pixel ops : interface with DCT */ + +extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); +extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); +extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); + +void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); +void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); +void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); + +/* add and put pixel (decoding) */ +typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); + +extern op_pixels_func put_pixels_tab[4]; +extern op_pixels_func avg_pixels_tab[4]; +extern op_pixels_func put_no_rnd_pixels_tab[4]; +extern op_pixels_func avg_no_rnd_pixels_tab[4]; + +/* sub pixel (encoding) */ +extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h); + +#define sub_pixels_2(block, pixels, line_size, dxy) \ + sub_pixels_tab[dxy](block, pixels, line_size, 8) + +/* motion estimation */ + +typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size, int h); + +extern op_pixels_abs_func pix_abs16x16; +extern op_pixels_abs_func pix_abs16x16_x2; +extern op_pixels_abs_func pix_abs16x16_y2; +extern op_pixels_abs_func pix_abs16x16_xy2; + +int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx, int h); + +#ifdef CONFIG_MMX + +#define MM_MMX 0x0001 /* standard MMX */ +#define MM_3DNOW 0x0004 /* AMD 3DNOW */ +#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ +#define MM_SSE 0x0008 /* SSE functions */ +#define MM_SSE2 0x0010 /* PIV SSE2 functions */ + +extern int mm_flags; + +int mm_support(void); + +static inline void emms(void) +{ + asm volatile ("emms;"); +} + +#define __align8 __attribute__ ((aligned (8))) + +void dsputil_init_mmx(void); + +#else + +#define __align8 + +#endif + +#endif diff --git a/libavcodec/fdctref.c b/libavcodec/fdctref.c new file mode 100644 index 0000000000..5038e67538 --- /dev/null +++ b/libavcodec/fdctref.c @@ -0,0 +1,120 @@ +/* fdctref.c, forward discrete cosine transform, double precision */ + +/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */ + +/* + * Disclaimer of Warranty + * + * These software programs are available to the user without any license fee or + * royalty on an "as is" basis. The MPEG Software Simulation Group disclaims + * any and all warranties, whether express, implied, or statuary, including any + * implied warranties or merchantability or of fitness for a particular + * purpose. In no event shall the copyright-holder be liable for any + * incidental, punitive, or consequential damages of any kind whatsoever + * arising from the use of these programs. + * + * This disclaimer of warranty extends to the user of these programs and user's + * customers, employees, agents, transferees, successors, and assigns. + * + * The MPEG Software Simulation Group does not represent or warrant that the + * programs furnished hereunder are free of infringement of any third-party + * patents. + * + * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware, + * are subject to royalty fees to patent holders. Many of these patents are + * general enough such that they are unavoidable regardless of implementation + * design. + * + */ + +#include <math.h> + +// #include "config.h" + +#ifndef PI +# ifdef M_PI +# define PI M_PI +# else +# define PI 3.14159265358979323846 +# endif +#endif + +/* global declarations */ +void init_fdct (void); +void fdct (short *block); + +/* private data */ +static double c[8][8]; /* transform coefficients */ + +void init_fdct() +{ + int i, j; + double s; + + for (i=0; i<8; i++) + { + s = (i==0) ? sqrt(0.125) : 0.5; + + for (j=0; j<8; j++) + c[i][j] = s * cos((PI/8.0)*i*(j+0.5)); + } +} + +void fdct(block) +short *block; +{ + register int i, j; + double s; + double tmp[64]; + + for(i = 0; i < 8; i++) + for(j = 0; j < 8; j++) + { + s = 0.0; + +/* + * for(k = 0; k < 8; k++) + * s += c[j][k] * block[8 * i + k]; + */ + s += c[j][0] * block[8 * i + 0]; + s += c[j][1] * block[8 * i + 1]; + s += c[j][2] * block[8 * i + 2]; + s += c[j][3] * block[8 * i + 3]; + s += c[j][4] * block[8 * i + 4]; + s += c[j][5] * block[8 * i + 5]; + s += c[j][6] * block[8 * i + 6]; + s += c[j][7] * block[8 * i + 7]; + + tmp[8 * i + j] = s; + } + + for(j = 0; j < 8; j++) + for(i = 0; i < 8; i++) + { + s = 0.0; + +/* + * for(k = 0; k < 8; k++) + * s += c[i][k] * tmp[8 * k + j]; + */ + s += c[i][0] * tmp[8 * 0 + j]; + s += c[i][1] * tmp[8 * 1 + j]; + s += c[i][2] * tmp[8 * 2 + j]; + s += c[i][3] * tmp[8 * 3 + j]; + s += c[i][4] * tmp[8 * 4 + j]; + s += c[i][5] * tmp[8 * 5 + j]; + s += c[i][6] * tmp[8 * 6 + j]; + s += c[i][7] * tmp[8 * 7 + j]; + + block[8 * i + j] = (short)floor(s + 0.499999); +/* + * reason for adding 0.499999 instead of 0.5: + * s is quite often x.5 (at least for i and/or j = 0 or 4) + * and setting the rounding threshold exactly to 0.5 leads to an + * extremely high arithmetic implementation dependency of the result; + * s being between x.5 and x.500001 (which is now incorrectly rounded + * downwards instead of upwards) is assumed to occur less often + * (if at all) + */ + } +} diff --git a/libavcodec/h263.c b/libavcodec/h263.c new file mode 100644 index 0000000000..35ecb15c2e --- /dev/null +++ b/libavcodec/h263.c @@ -0,0 +1,1291 @@ +/* + * H263/MPEG4 backend for ffmpeg encoder and decoder + * Copyright (c) 2000,2001 Gerard Lantau. + * H263+ support for custom picture format. + * Copyright (c) 2001 Juan J. Sierralta P. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" +#include "h263data.h" +#include "mpeg4data.h" + +#define NDEBUG +#include <assert.h> + +static void h263_encode_block(MpegEncContext * s, DCTELEM * block, + int n); +static void h263_encode_motion(MpegEncContext * s, int val); +static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, + int n); +static int h263_decode_motion(MpegEncContext * s, int pred); +static int h263_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded); +static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded); + +int h263_get_picture_format(int width, int height) +{ + int format; + + if (width == 128 && height == 96) + format = 1; + else if (width == 176 && height == 144) + format = 2; + else if (width == 352 && height == 288) + format = 3; + else if (width == 704 && height == 576) + format = 4; + else if (width == 1408 && height == 1152) + format = 5; + else + format = 7; + return format; +} + +void h263_encode_picture_header(MpegEncContext * s, int picture_number) +{ + int format, umvplus; + + align_put_bits(&s->pb); + put_bits(&s->pb, 22, 0x20); + put_bits(&s->pb, 8, ((s->picture_number * 30 * FRAME_RATE_BASE) / + s->frame_rate) & 0xff); + + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, 1, 0); /* h263 id */ + put_bits(&s->pb, 1, 0); /* split screen off */ + put_bits(&s->pb, 1, 0); /* camera off */ + put_bits(&s->pb, 1, 0); /* freeze picture release off */ + + if (!s->h263_plus) { + /* H.263v1 */ + format = h263_get_picture_format(s->width, s->height); + put_bits(&s->pb, 3, format); + put_bits(&s->pb, 1, (s->pict_type == P_TYPE)); + /* By now UMV IS DISABLED ON H.263v1, since the restrictions + of H.263v1 UMV implies to check the predicted MV after + calculation of the current MB to see if we're on the limits */ + put_bits(&s->pb, 1, 0); /* unrestricted motion vector: off */ + put_bits(&s->pb, 1, 0); /* SAC: off */ + put_bits(&s->pb, 1, 0); /* advanced prediction mode: off */ + put_bits(&s->pb, 1, 0); /* not PB frame */ + put_bits(&s->pb, 5, s->qscale); + put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ + } else { + /* H.263v2 */ + /* H.263 Plus PTYPE */ + put_bits(&s->pb, 3, 7); + put_bits(&s->pb,3,1); /* Update Full Extended PTYPE */ + put_bits(&s->pb,3,6); /* Custom Source Format */ + put_bits(&s->pb,1,0); /* Custom PCF: off */ + umvplus = (s->pict_type == P_TYPE) && s->unrestricted_mv; + put_bits(&s->pb, 1, umvplus); /* Unrestricted Motion Vector */ + put_bits(&s->pb,1,0); /* SAC: off */ + put_bits(&s->pb,1,0); /* Advanced Prediction Mode: off */ + put_bits(&s->pb,1,0); /* Advanced Intra Coding: off */ + put_bits(&s->pb,1,0); /* Deblocking Filter: off */ + put_bits(&s->pb,1,0); /* Slice Structured: off */ + put_bits(&s->pb,1,0); /* Reference Picture Selection: off */ + put_bits(&s->pb,1,0); /* Independent Segment Decoding: off */ + put_bits(&s->pb,1,0); /* Alternative Inter VLC: off */ + put_bits(&s->pb,1,0); /* Modified Quantization: off */ + put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ + put_bits(&s->pb,3,0); /* Reserved */ + + put_bits(&s->pb, 3, s->pict_type == P_TYPE); + + put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */ + put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */ + put_bits(&s->pb,1,0); /* Rounding Type */ + put_bits(&s->pb,2,0); /* Reserved */ + put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ + + /* This should be here if PLUSPTYPE */ + put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */ + + /* Custom Picture Format (CPFMT) */ + + put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */ + put_bits(&s->pb,9,(s->width >> 2) - 1); + put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ + put_bits(&s->pb,9,(s->height >> 2)); + /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */ + if (umvplus) + put_bits(&s->pb,1,1); /* Limited according tables of Annex D */ + put_bits(&s->pb, 5, s->qscale); + } + + put_bits(&s->pb, 1, 0); /* no PEI */ +} + +void h263_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int cbpc, cbpy, i, cbp, pred_x, pred_y; + + // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); + if (!s->mb_intra) { + /* compute cbp */ + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + if ((cbp | motion_x | motion_y) == 0) { + /* skip macroblock */ + put_bits(&s->pb, 1, 1); + return; + } + put_bits(&s->pb, 1, 0); /* mb coded */ + cbpc = cbp & 3; + put_bits(&s->pb, + inter_MCBPC_bits[cbpc], + inter_MCBPC_code[cbpc]); + cbpy = cbp >> 2; + cbpy ^= 0xf; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + + /* motion vectors: 16x16 mode only now */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + + h263_encode_motion(s, motion_x - pred_x); + h263_encode_motion(s, motion_y - pred_y); + } else { + /* compute cbp */ + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 1) + cbp |= 1 << (5 - i); + } + + cbpc = cbp & 3; + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, + intra_MCBPC_bits[cbpc], + intra_MCBPC_code[cbpc]); + } else { + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + inter_MCBPC_bits[cbpc + 4], + inter_MCBPC_code[cbpc + 4]); + } + if (s->h263_pred) { + /* XXX: currently, we do not try to use ac prediction */ + put_bits(&s->pb, 1, 0); /* no ac prediction */ + } + cbpy = cbp >> 2; + put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + } + + /* encode each block */ + if (s->h263_pred) { + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i); + } + } else { + for (i = 0; i < 6; i++) { + h263_encode_block(s, block[i], i); + } + } +} + +static inline int mid_pred(int a, int b, int c) +{ + int vmin, vmax; + vmin = a; + if (b < vmin) + vmin = b; + if (c < vmin) + vmin = c; + + vmax = a; + if (b > vmax) + vmax = b; + if (c > vmax) + vmax = c; + + return a + b + c - vmin - vmax; +} + +INT16 *h263_pred_motion(MpegEncContext * s, int block, + int *px, int *py) +{ + int x, y, wrap; + INT16 *A, *B, *C, *mot_val; + + x = 2 * s->mb_x + 1 + (block & 1); + y = 2 * s->mb_y + 1 + ((block >> 1) & 1); + wrap = 2 * s->mb_width + 2; + + mot_val = s->motion_val[(x) + (y) * wrap]; + + /* special case for first line */ + if (y == 1 || s->first_slice_line) { + A = s->motion_val[(x-1) + (y) * wrap]; + *px = A[0]; + *py = A[1]; + } else { + switch(block) { + default: + case 0: + A = s->motion_val[(x-1) + (y) * wrap]; + B = s->motion_val[(x) + (y-1) * wrap]; + C = s->motion_val[(x+2) + (y-1) * wrap]; + break; + case 1: + case 2: + A = s->motion_val[(x-1) + (y) * wrap]; + B = s->motion_val[(x) + (y-1) * wrap]; + C = s->motion_val[(x+1) + (y-1) * wrap]; + break; + case 3: + A = s->motion_val[(x-1) + (y) * wrap]; + B = s->motion_val[(x-1) + (y-1) * wrap]; + C = s->motion_val[(x) + (y-1) * wrap]; + break; + } + *px = mid_pred(A[0], B[0], C[0]); + *py = mid_pred(A[1], B[1], C[1]); + } + return mot_val; +} + + +static void h263_encode_motion(MpegEncContext * s, int val) +{ + int range, l, m, bit_size, sign, code, bits; + + if (val == 0) { + /* zero vector */ + code = 0; + put_bits(&s->pb, mvtab[code][1], mvtab[code][0]); + } else { + bit_size = s->f_code - 1; + range = 1 << bit_size; + /* modulo encoding */ + l = range * 32; + m = 2 * l; + if (val < -l) { + val += m; + } else if (val >= l) { + val -= m; + } + + if (val >= 0) { + val--; + code = (val >> bit_size) + 1; + bits = val & (range - 1); + sign = 0; + } else { + val = -val; + val--; + code = (val >> bit_size) + 1; + bits = val & (range - 1); + sign = 1; + } + + put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); + if (bit_size > 0) { + put_bits(&s->pb, bit_size, bits); + } + } +} + +void h263_encode_init_vlc(MpegEncContext *s) +{ + static int done = 0; + + if (!done) { + done = 1; + init_rl(&rl_inter); + init_rl(&rl_intra); + } +} + +static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n) +{ + int level, run, last, i, j, last_index, last_non_zero, sign, slevel; + int code; + RLTable *rl = &rl_inter; + + if (s->mb_intra) { + /* DC coef */ + level = block[0]; + /* 255 cannot be represented, so we clamp */ + if (level > 254) { + level = 254; + block[0] = 254; + } + if (level == 128) + put_bits(&s->pb, 8, 0xff); + else + put_bits(&s->pb, 8, level & 0xff); + i = 1; + } else { + i = 0; + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = zigzag_direct[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, last, run, level); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + put_bits(&s->pb, 8, slevel & 0xff); + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } +} + +/***************************************************/ + +/* write mpeg4 VOP header */ +void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) +{ + align_put_bits(&s->pb); + + put_bits(&s->pb, 32, 0x1B6); /* vop header */ + put_bits(&s->pb, 2, s->pict_type - 1); /* pict type: I = 0 , P = 1 */ + /* XXX: time base + 1 not always correct */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 0); + + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, 4, 1); /* XXX: correct time increment */ + put_bits(&s->pb, 1, 1); /* marker */ + put_bits(&s->pb, 1, 1); /* vop coded */ + if (s->pict_type == P_TYPE) { + s->no_rounding = 0; + put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ + } + put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ + + put_bits(&s->pb, 5, s->qscale); + + if (s->pict_type != I_TYPE) + put_bits(&s->pb, 3, s->f_code); /* fcode_for */ + // printf("****frame %d\n", picture_number); +} + +void h263_dc_scale(MpegEncContext * s) +{ + int quant; + + quant = s->qscale; + /* luminance */ + if (quant < 5) + s->y_dc_scale = 8; + else if (quant > 4 && quant < 9) + s->y_dc_scale = (2 * quant); + else if (quant > 8 && quant < 25) + s->y_dc_scale = (quant + 8); + else + s->y_dc_scale = (2 * quant - 16); + /* chrominance */ + if (quant < 5) + s->c_dc_scale = 8; + else if (quant > 4 && quant < 25) + s->c_dc_scale = ((quant + 13) / 2); + else + s->c_dc_scale = (quant - 6); +} + +static int mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr) +{ + int a, b, c, x, y, wrap, pred, scale; + UINT16 *dc_val; + + /* find prediction */ + if (n < 4) { + x = 2 * s->mb_x + 1 + (n & 1); + y = 2 * s->mb_y + 1 + ((n & 2) >> 1); + wrap = s->mb_width * 2 + 2; + dc_val = s->dc_val[0]; + scale = s->y_dc_scale; + } else { + x = s->mb_x + 1; + y = s->mb_y + 1; + wrap = s->mb_width + 2; + dc_val = s->dc_val[n - 4 + 1]; + scale = s->c_dc_scale; + } + + /* B C + * A X + */ + a = dc_val[(x - 1) + (y) * wrap]; + b = dc_val[(x - 1) + (y - 1) * wrap]; + c = dc_val[(x) + (y - 1) * wrap]; + + if (abs(a - b) < abs(b - c)) { + pred = c; + *dir_ptr = 1; /* top */ + } else { + pred = a; + *dir_ptr = 0; /* left */ + } + /* we assume pred is positive */ + pred = (pred + (scale >> 1)) / scale; + + /* prepare address for prediction update */ + *dc_val_ptr = &dc_val[(x) + (y) * wrap]; + + return pred; +} + +void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, + int dir) +{ + int x, y, wrap, i; + INT16 *ac_val, *ac_val1; + + /* find prediction */ + if (n < 4) { + x = 2 * s->mb_x + 1 + (n & 1); + y = 2 * s->mb_y + 1 + ((n & 2) >> 1); + wrap = s->mb_width * 2 + 2; + ac_val = s->ac_val[0][0]; + } else { + x = s->mb_x + 1; + y = s->mb_y + 1; + wrap = s->mb_width + 2; + ac_val = s->ac_val[n - 4 + 1][0]; + } + ac_val += ((y) * wrap + (x)) * 16; + ac_val1 = ac_val; + if (s->ac_pred) { + if (dir == 0) { + /* left prediction */ + ac_val -= 16; + for(i=1;i<8;i++) { + block[i*8] += ac_val[i]; + } + } else { + /* top prediction */ + ac_val -= 16 * wrap; + for(i=1;i<8;i++) { + block[i] += ac_val[i + 8]; + } + } + } + /* left copy */ + for(i=1;i<8;i++) + ac_val1[i] = block[i * 8]; + /* top copy */ + for(i=1;i<8;i++) + ac_val1[8 + i] = block[i]; +} + +static inline void mpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr) +{ + int size, v, pred; + UINT16 *dc_val; + + pred = mpeg4_pred_dc(s, n, &dc_val, dir_ptr); + if (n < 4) { + *dc_val = level * s->y_dc_scale; + } else { + *dc_val = level * s->c_dc_scale; + } + + /* do the prediction */ + level -= pred; + /* find number of bits */ + size = 0; + v = abs(level); + while (v) { + v >>= 1; + size++; + } + + if (n < 4) { + /* luminance */ + put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); + } else { + /* chrominance */ + put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); + } + + /* encode remaining bits */ + if (size > 0) { + if (level < 0) + level = (-level) ^ ((1 << size) - 1); + put_bits(&s->pb, size, level); + if (size > 8) + put_bits(&s->pb, 1, 1); + } +} + +static void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) +{ + int level, run, last, i, j, last_index, last_non_zero, sign, slevel; + int code, dc_pred_dir; + const RLTable *rl; + + if (s->mb_intra) { + /* mpeg4 based DC predictor */ + mpeg4_encode_dc(s, block[0], n, &dc_pred_dir); + i = 1; + rl = &rl_intra; + } else { + i = 0; + rl = &rl_inter; + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = zigzag_direct[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, last, run, level); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + int level1, run1; + level1 = level - rl->max_level[last][run]; + if (level1 < 1) + goto esc2; + code = get_rl_index(rl, last, run, level1); + if (code == rl->n) { + esc2: + put_bits(&s->pb, 1, 1); + if (level > MAX_LEVEL) + goto esc3; + run1 = run - rl->max_run[last][level] - 1; + if (run1 < 0) + goto esc3; + code = get_rl_index(rl, last, run1, level); + if (code == rl->n) { + esc3: + /* third escape */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 12, slevel & 0xfff); + put_bits(&s->pb, 1, 1); + } else { + /* second escape */ + put_bits(&s->pb, 1, 0); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(&s->pb, 1, sign); + } + } else { + /* first escape */ + put_bits(&s->pb, 1, 0); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(&s->pb, 1, sign); + } + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } +} + + + +/***********************************************/ +/* decoding */ + +static VLC intra_MCBPC_vlc; +static VLC inter_MCBPC_vlc; +static VLC cbpy_vlc; +static VLC mv_vlc; +static VLC dc_lum, dc_chrom; + +void init_rl(RLTable *rl) +{ + INT8 max_level[MAX_RUN+1], max_run[MAX_LEVEL+1]; + UINT8 index_run[MAX_RUN+1]; + int last, run, level, start, end, i; + + /* compute max_level[], max_run[] and index_run[] */ + for(last=0;last<2;last++) { + if (last == 0) { + start = 0; + end = rl->last; + } else { + start = rl->last; + end = rl->n; + } + + memset(max_level, 0, MAX_RUN + 1); + memset(max_run, 0, MAX_LEVEL + 1); + memset(index_run, rl->n, MAX_RUN + 1); + for(i=start;i<end;i++) { + run = rl->table_run[i]; + level = rl->table_level[i]; + if (index_run[run] == rl->n) + index_run[run] = i; + if (level > max_level[run]) + max_level[run] = level; + if (run > max_run[level]) + max_run[level] = run; + } + rl->max_level[last] = malloc(MAX_RUN + 1); + memcpy(rl->max_level[last], max_level, MAX_RUN + 1); + rl->max_run[last] = malloc(MAX_LEVEL + 1); + memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1); + rl->index_run[last] = malloc(MAX_RUN + 1); + memcpy(rl->index_run[last], index_run, MAX_RUN + 1); + } +} + +void init_vlc_rl(RLTable *rl) +{ + init_vlc(&rl->vlc, 9, rl->n + 1, + &rl->table_vlc[0][1], 4, 2, + &rl->table_vlc[0][0], 4, 2); +} + +/* init vlcs */ + +/* XXX: find a better solution to handle static init */ +void h263_decode_init_vlc(MpegEncContext *s) +{ + static int done = 0; + + if (!done) { + done = 1; + + init_vlc(&intra_MCBPC_vlc, 6, 8, + intra_MCBPC_bits, 1, 1, + intra_MCBPC_code, 1, 1); + init_vlc(&inter_MCBPC_vlc, 9, 20, + inter_MCBPC_bits, 1, 1, + inter_MCBPC_code, 1, 1); + init_vlc(&cbpy_vlc, 6, 16, + &cbpy_tab[0][1], 2, 1, + &cbpy_tab[0][0], 2, 1); + init_vlc(&mv_vlc, 9, 33, + &mvtab[0][1], 2, 1, + &mvtab[0][0], 2, 1); + init_rl(&rl_inter); + init_rl(&rl_intra); + init_vlc_rl(&rl_inter); + init_vlc_rl(&rl_intra); + init_vlc(&dc_lum, 9, 13, + &DCtab_lum[0][1], 2, 1, + &DCtab_lum[0][0], 2, 1); + init_vlc(&dc_chrom, 9, 13, + &DCtab_chrom[0][1], 2, 1, + &DCtab_chrom[0][0], 2, 1); + } +} + +int h263_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant; + INT16 *mot_val; + static UINT8 quant_tab[4] = { -1, -2, 1, 2 }; + + if (s->pict_type == P_TYPE) { + if (get_bits(&s->gb, 1)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + return 0; + } + cbpc = get_vlc(&s->gb, &inter_MCBPC_vlc); + if (cbpc < 0) + return -1; + dquant = cbpc & 8; + s->mb_intra = ((cbpc & 4) != 0); + } else { + cbpc = get_vlc(&s->gb, &intra_MCBPC_vlc); + if (cbpc < 0) + return -1; + dquant = cbpc & 4; + s->mb_intra = 1; + } + + if (!s->mb_intra) { + cbpy = get_vlc(&s->gb, &cbpy_vlc); + cbp = (cbpc & 3) | ((cbpy ^ 0xf) << 2); + if (dquant) { + s->qscale += quant_tab[get_bits(&s->gb, 2)]; + if (s->qscale < 1) + s->qscale = 1; + else if (s->qscale > 31) + s->qscale = 31; + } + s->mv_dir = MV_DIR_FORWARD; + if ((cbpc & 16) == 0) { + /* 16x16 motion prediction */ + s->mv_type = MV_TYPE_16X16; + h263_pred_motion(s, 0, &pred_x, &pred_y); + mx = h263_decode_motion(s, pred_x); + if (mx >= 0xffff) + return -1; + my = h263_decode_motion(s, pred_y); + if (my >= 0xffff) + return -1; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + } else { + s->mv_type = MV_TYPE_8X8; + for(i=0;i<4;i++) { + mot_val = h263_pred_motion(s, i, &pred_x, &pred_y); + mx = h263_decode_motion(s, pred_x); + if (mx >= 0xffff) + return -1; + my = h263_decode_motion(s, pred_y); + if (my >= 0xffff) + return -1; + s->mv[0][i][0] = mx; + s->mv[0][i][1] = my; + mot_val[0] = mx; + mot_val[1] = my; + } + } + } else { + s->ac_pred = 0; + if (s->h263_pred) { + s->ac_pred = get_bits(&s->gb, 1); + } + cbpy = get_vlc(&s->gb, &cbpy_vlc); + cbp = (cbpc & 3) | (cbpy << 2); + if (dquant) { + s->qscale += quant_tab[get_bits(&s->gb, 2)]; + if (s->qscale < 1) + s->qscale = 1; + else if (s->qscale > 31) + s->qscale = 31; + } + } + + /* decode each block */ + if (s->h263_pred) { + for (i = 0; i < 6; i++) { + if (mpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + return -1; + } + } else { + for (i = 0; i < 6; i++) { + if (h263_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + return -1; + } + } + return 0; +} + +static int h263_decode_motion(MpegEncContext * s, int pred) +{ + int code, val, sign, shift, l, m; + + code = get_vlc(&s->gb, &mv_vlc); + if (code < 0) + return 0xffff; + + if (code == 0) + return pred; + sign = get_bits(&s->gb, 1); + shift = s->f_code - 1; + val = (code - 1) << shift; + if (shift > 0) + val |= get_bits(&s->gb, shift); + val++; + if (sign) + val = -val; + val += pred; + + /* modulo decoding */ + if (!s->h263_long_vectors) { + l = (1 << (s->f_code - 1)) * 32; + m = 2 * l; + if (val < -l) { + val += m; + } else if (val >= l) { + val -= m; + } + } else { + /* horrible h263 long vector mode */ + if (pred < -31 && val < -63) + val += 64; + if (pred > 32 && val > 63) + val -= 64; + } + return val; +} + +static int h263_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded) +{ + int code, level, i, j, last, run; + RLTable *rl = &rl_inter; + + if (s->mb_intra) { + /* DC coef */ + if (s->h263_rv10 && s->rv10_version == 3 && s->pict_type == I_TYPE) { + int component, diff; + component = (n <= 3 ? 0 : n - 4 + 1); + level = s->last_dc[component]; + if (s->rv10_first_dc_coded[component]) { + diff = rv_decode_dc(s, n); + if (diff == 0xffff) + return -1; + level += diff; + level = level & 0xff; /* handle wrap round */ + s->last_dc[component] = level; + } else { + s->rv10_first_dc_coded[component] = 1; + } + } else { + level = get_bits(&s->gb, 8); + if (level == 255) + level = 128; + } + block[0] = level; + i = 1; + } else { + i = 0; + } + if (!coded) { + s->block_last_index[n] = i - 1; + return 0; + } + + for(;;) { + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0) + return -1; + if (code == rl->n) { + /* escape */ + last = get_bits(&s->gb, 1); + run = get_bits(&s->gb, 6); + level = (INT8)get_bits(&s->gb, 8); + if (s->h263_rv10 && level == -128) { + /* XXX: should patch encoder too */ + level = get_bits(&s->gb, 12); + level = (level << 20) >> 20; + } + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + if (get_bits(&s->gb, 1)) + level = -level; + } + i += run; + if (i >= 64) + return -1; + j = zigzag_direct[i]; + block[j] = level; + if (last) + break; + i++; + } + s->block_last_index[n] = i; + return 0; +} + +static int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) +{ + int level, pred, code; + UINT16 *dc_val; + + if (n < 4) + code = get_vlc(&s->gb, &dc_lum); + else + code = get_vlc(&s->gb, &dc_chrom); + if (code < 0) + return -1; + if (code == 0) { + level = 0; + } else { + level = get_bits(&s->gb, code); + if ((level >> (code - 1)) == 0) /* if MSB not set it is negative*/ + level = - (level ^ ((1 << code) - 1)); + if (code > 8) + get_bits(&s->gb, 1); /* marker */ + } + + pred = mpeg4_pred_dc(s, n, &dc_val, dir_ptr); + level += pred; + if (level < 0) + level = 0; + if (n < 4) { + *dc_val = level * s->y_dc_scale; + } else { + *dc_val = level * s->c_dc_scale; + } + return level; +} + +static int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded) +{ + int code, level, i, j, last, run; + int dc_pred_dir; + RLTable *rl; + const UINT8 *scan_table; + + if (s->mb_intra) { + /* DC coef */ + level = mpeg4_decode_dc(s, n, &dc_pred_dir); + if (level < 0) + return -1; + block[0] = level; + i = 1; + if (!coded) + goto not_coded; + rl = &rl_intra; + if (s->ac_pred) { + if (dc_pred_dir == 0) + scan_table = ff_alternate_vertical_scan; /* left */ + else + scan_table = ff_alternate_horizontal_scan; /* top */ + } else { + scan_table = zigzag_direct; + } + } else { + i = 0; + if (!coded) { + s->block_last_index[n] = i - 1; + return 0; + } + rl = &rl_inter; + scan_table = zigzag_direct; + } + + for(;;) { + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0) + return -1; + if (code == rl->n) { + /* escape */ + if (get_bits(&s->gb, 1) != 0) { + if (get_bits(&s->gb, 1) != 0) { + /* third escape */ + last = get_bits(&s->gb, 1); + run = get_bits(&s->gb, 6); + get_bits(&s->gb, 1); /* marker */ + level = get_bits(&s->gb, 12); + level = (level << 20) >> 20; /* sign extend */ + get_bits(&s->gb, 1); /* marker */ + } else { + /* second escape */ + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0 || code >= rl->n) + return -1; + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + run += rl->max_run[last][level] + 1; + if (get_bits(&s->gb, 1)) + level = -level; + } + } else { + /* first escape */ + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0 || code >= rl->n) + return -1; + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + level += rl->max_level[last][run]; + if (get_bits(&s->gb, 1)) + level = -level; + } + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + if (get_bits(&s->gb, 1)) + level = -level; + } + i += run; + if (i >= 64) + return -1; + j = scan_table[i]; + block[j] = level; + i++; + if (last) + break; + } + not_coded: + if (s->mb_intra) { + mpeg4_pred_ac(s, block, n, dc_pred_dir); + if (s->ac_pred) { + i = 64; /* XXX: not optimal */ + } + } + s->block_last_index[n] = i - 1; + return 0; +} + +/* most is hardcoded. should extend to handle all h263 streams */ +int h263_decode_picture_header(MpegEncContext *s) +{ + int format, width, height; + + /* picture header */ + if (get_bits(&s->gb, 22) != 0x20) + return -1; + get_bits(&s->gb, 8); /* picture timestamp */ + + if (get_bits(&s->gb, 1) != 1) + return -1; /* marker */ + if (get_bits(&s->gb, 1) != 0) + return -1; /* h263 id */ + get_bits(&s->gb, 1); /* split screen off */ + get_bits(&s->gb, 1); /* camera off */ + get_bits(&s->gb, 1); /* freeze picture release off */ + + format = get_bits(&s->gb, 3); + + if (format != 7) { + s->h263_plus = 0; + /* H.263v1 */ + width = h263_format[format][0]; + height = h263_format[format][1]; + if (!width) + return -1; + + s->pict_type = I_TYPE + get_bits(&s->gb, 1); + + s->unrestricted_mv = get_bits(&s->gb, 1); + s->h263_long_vectors = s->unrestricted_mv; + + if (get_bits(&s->gb, 1) != 0) + return -1; /* SAC: off */ + if (get_bits(&s->gb, 1) != 0) + return -1; /* advanced prediction mode: off */ + if (get_bits(&s->gb, 1) != 0) + return -1; /* not PB frame */ + + s->qscale = get_bits(&s->gb, 5); + get_bits(&s->gb, 1); /* Continuous Presence Multipoint mode: off */ + } else { + s->h263_plus = 1; + /* H.263v2 */ + if (get_bits(&s->gb, 3) != 1) + return -1; + if (get_bits(&s->gb, 3) != 6) /* custom source format */ + return -1; + get_bits(&s->gb, 12); + get_bits(&s->gb, 3); + s->pict_type = get_bits(&s->gb, 3) + 1; + if (s->pict_type != I_TYPE && + s->pict_type != P_TYPE) + return -1; + get_bits(&s->gb, 7); + get_bits(&s->gb, 4); /* aspect ratio */ + width = (get_bits(&s->gb, 9) + 1) * 4; + get_bits(&s->gb, 1); + height = get_bits(&s->gb, 9) * 4; + if (height == 0) + return -1; + s->qscale = get_bits(&s->gb, 5); + } + /* PEI */ + while (get_bits(&s->gb, 1) != 0) { + get_bits(&s->gb, 8); + } + s->f_code = 1; + s->width = width; + s->height = height; + return 0; +} + +/* decode mpeg4 VOP header */ +int mpeg4_decode_picture_header(MpegEncContext * s) +{ + int time_incr, startcode, state, v; + + redo: + /* search next start code */ + align_get_bits(&s->gb); + state = 0xff; + for(;;) { + v = get_bits(&s->gb, 8); + if (state == 0x000001) { + state = ((state << 8) | v) & 0xffffff; + startcode = state; + break; + } + state = ((state << 8) | v) & 0xffffff; + /* XXX: really detect end of frame */ + if (state == 0) + return -1; + } + + if (startcode == 0x120) { + int time_increment_resolution, width, height; + + /* vol header */ + get_bits(&s->gb, 1); /* random access */ + get_bits(&s->gb, 8); /* vo_type */ + get_bits(&s->gb, 1); /* is_ol_id */ + get_bits(&s->gb, 4); /* vo_ver_id */ + get_bits(&s->gb, 3); /* vo_priority */ + + get_bits(&s->gb, 4); /* aspect_ratio_info */ + get_bits(&s->gb, 1); /* vol control parameter */ + get_bits(&s->gb, 2); /* vol shape */ + get_bits(&s->gb, 1); /* marker */ + + time_increment_resolution = get_bits(&s->gb, 16); + s->time_increment_bits = log2(time_increment_resolution - 1) + 1; + get_bits(&s->gb, 1); /* marker */ + + get_bits(&s->gb, 1); /* vop rate */ + get_bits(&s->gb, s->time_increment_bits); + get_bits(&s->gb, 1); /* marker */ + + width = get_bits(&s->gb, 13); + get_bits(&s->gb, 1); /* marker */ + height = get_bits(&s->gb, 13); + get_bits(&s->gb, 1); /* marker */ + + get_bits(&s->gb, 1); /* interfaced */ + get_bits(&s->gb, 1); /* OBMC */ + get_bits(&s->gb, 2); /* vol_sprite_usage */ + get_bits(&s->gb, 1); /* not_8_bit */ + + get_bits(&s->gb, 1); /* vol_quant_type */ + get_bits(&s->gb, 1); /* vol_quarter_pixel */ + get_bits(&s->gb, 1); /* complexity_estimation_disabled */ + get_bits(&s->gb, 1); /* resync_marker_disabled */ + get_bits(&s->gb, 1); /* data_partioning_enabled */ + goto redo; + } else if (startcode != 0x1b6) { + goto redo; + } + + s->pict_type = get_bits(&s->gb, 2) + 1; /* pict type: I = 0 , P = 1 */ + if (s->pict_type != I_TYPE && + s->pict_type != P_TYPE) + return -1; + + /* XXX: parse time base */ + time_incr = 0; + while (get_bits(&s->gb, 1) != 0) + time_incr++; + + get_bits(&s->gb, 1); /* marker */ + get_bits(&s->gb, s->time_increment_bits); + get_bits(&s->gb, 1); /* marker */ + /* vop coded */ + if (get_bits(&s->gb, 1) != 1) + return -1; + + if (s->pict_type == P_TYPE) { + /* rounding type for motion estimation */ + s->no_rounding = get_bits(&s->gb, 1); + } + + if (get_bits(&s->gb, 3) != 0) + return -1; /* intra dc VLC threshold */ + + s->qscale = get_bits(&s->gb, 5); + + if (s->pict_type != I_TYPE) { + s->f_code = get_bits(&s->gb, 3); /* fcode_for */ + } + return 0; +} + +/* don't understand why they choose a different header ! */ +int intel_h263_decode_picture_header(MpegEncContext *s) +{ + int format; + + /* picture header */ + if (get_bits(&s->gb, 22) != 0x20) + return -1; + get_bits(&s->gb, 8); /* picture timestamp */ + + if (get_bits(&s->gb, 1) != 1) + return -1; /* marker */ + if (get_bits(&s->gb, 1) != 0) + return -1; /* h263 id */ + get_bits(&s->gb, 1); /* split screen off */ + get_bits(&s->gb, 1); /* camera off */ + get_bits(&s->gb, 1); /* freeze picture release off */ + + format = get_bits(&s->gb, 3); + if (format != 7) + return -1; + + s->h263_plus = 0; + + s->pict_type = I_TYPE + get_bits(&s->gb, 1); + + s->unrestricted_mv = get_bits(&s->gb, 1); + s->h263_long_vectors = s->unrestricted_mv; + + if (get_bits(&s->gb, 1) != 0) + return -1; /* SAC: off */ + if (get_bits(&s->gb, 1) != 0) + return -1; /* advanced prediction mode: off */ + if (get_bits(&s->gb, 1) != 0) + return -1; /* not PB frame */ + + /* skip unknown header garbage */ + get_bits(&s->gb, 41); + + s->qscale = get_bits(&s->gb, 5); + get_bits(&s->gb, 1); /* Continuous Presence Multipoint mode: off */ + + /* PEI */ + while (get_bits(&s->gb, 1) != 0) { + get_bits(&s->gb, 8); + } + s->f_code = 1; + return 0; +} diff --git a/libavcodec/h263data.h b/libavcodec/h263data.h new file mode 100644 index 0000000000..4fd9d36298 --- /dev/null +++ b/libavcodec/h263data.h @@ -0,0 +1,115 @@ + +/* intra MCBPC, mb_type = (intra), then (intraq) */ +static const UINT8 intra_MCBPC_code[8] = { 1, 1, 2, 3, 1, 1, 2, 3 }; +static const UINT8 intra_MCBPC_bits[8] = { 1, 3, 3, 3, 4, 6, 6, 6 }; + +/* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */ +static const UINT8 inter_MCBPC_code[20] = { + 1, 3, 2, 5, + 3, 4, 3, 3, + 0, 1, 2, 3, + 4, 4, 3, 2, + 2, 5, 4, 5, +}; +static const UINT8 inter_MCBPC_bits[20] = { + 1, 4, 4, 6, + 5, 8, 8, 7, + 12, 12, 12, 12, + 6, 9, 9, 9, + 3, 7, 7, 8, +}; + +static const UINT8 cbpy_tab[16][2] = +{ + {3,4}, {5,5}, {4,5}, {9,4}, {3,5}, {7,4}, {2,6}, {11,4}, + {2,5}, {3,6}, {5,4}, {10,4}, {4,4}, {8,4}, {6,4}, {3,2} +}; + +static const UINT8 mvtab[33][2] = +{ + {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7}, + {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, + {12,10}, {11,10}, {10,10}, {9,10}, {8,10}, {7,10}, {6,10}, {5,10}, + {4,10}, {7,11}, {6,11}, {5,11}, {4,11}, {3,11}, {2,11}, {3,12}, + {2,12} +}; + +/* third non intra table */ +const UINT16 inter_vlc[103][2] = { +{ 0x2, 2 },{ 0xf, 4 },{ 0x15, 6 },{ 0x17, 7 }, +{ 0x1f, 8 },{ 0x25, 9 },{ 0x24, 9 },{ 0x21, 10 }, +{ 0x20, 10 },{ 0x7, 11 },{ 0x6, 11 },{ 0x20, 11 }, +{ 0x6, 3 },{ 0x14, 6 },{ 0x1e, 8 },{ 0xf, 10 }, +{ 0x21, 11 },{ 0x50, 12 },{ 0xe, 4 },{ 0x1d, 8 }, +{ 0xe, 10 },{ 0x51, 12 },{ 0xd, 5 },{ 0x23, 9 }, +{ 0xd, 10 },{ 0xc, 5 },{ 0x22, 9 },{ 0x52, 12 }, +{ 0xb, 5 },{ 0xc, 10 },{ 0x53, 12 },{ 0x13, 6 }, +{ 0xb, 10 },{ 0x54, 12 },{ 0x12, 6 },{ 0xa, 10 }, +{ 0x11, 6 },{ 0x9, 10 },{ 0x10, 6 },{ 0x8, 10 }, +{ 0x16, 7 },{ 0x55, 12 },{ 0x15, 7 },{ 0x14, 7 }, +{ 0x1c, 8 },{ 0x1b, 8 },{ 0x21, 9 },{ 0x20, 9 }, +{ 0x1f, 9 },{ 0x1e, 9 },{ 0x1d, 9 },{ 0x1c, 9 }, +{ 0x1b, 9 },{ 0x1a, 9 },{ 0x22, 11 },{ 0x23, 11 }, +{ 0x56, 12 },{ 0x57, 12 },{ 0x7, 4 },{ 0x19, 9 }, +{ 0x5, 11 },{ 0xf, 6 },{ 0x4, 11 },{ 0xe, 6 }, +{ 0xd, 6 },{ 0xc, 6 },{ 0x13, 7 },{ 0x12, 7 }, +{ 0x11, 7 },{ 0x10, 7 },{ 0x1a, 8 },{ 0x19, 8 }, +{ 0x18, 8 },{ 0x17, 8 },{ 0x16, 8 },{ 0x15, 8 }, +{ 0x14, 8 },{ 0x13, 8 },{ 0x18, 9 },{ 0x17, 9 }, +{ 0x16, 9 },{ 0x15, 9 },{ 0x14, 9 },{ 0x13, 9 }, +{ 0x12, 9 },{ 0x11, 9 },{ 0x7, 10 },{ 0x6, 10 }, +{ 0x5, 10 },{ 0x4, 10 },{ 0x24, 11 },{ 0x25, 11 }, +{ 0x26, 11 },{ 0x27, 11 },{ 0x58, 12 },{ 0x59, 12 }, +{ 0x5a, 12 },{ 0x5b, 12 },{ 0x5c, 12 },{ 0x5d, 12 }, +{ 0x5e, 12 },{ 0x5f, 12 },{ 0x3, 7 }, +}; + +const INT8 inter_level[102] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 1, 2, 3, 4, + 5, 6, 1, 2, 3, 4, 1, 2, + 3, 1, 2, 3, 1, 2, 3, 1, + 2, 3, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 3, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +}; + +const INT8 inter_run[102] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, + 3, 4, 4, 4, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 0, 0, 0, 1, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, +}; + +static RLTable rl_inter = { + 102, + 58, + inter_vlc, + inter_run, + inter_level, +}; + +static const UINT16 h263_format[8][2] = { + { 0, 0 }, + { 128, 96 }, + { 176, 144 }, + { 352, 288 }, + { 704, 576 }, + { 1408, 1152 }, +}; + diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c new file mode 100644 index 0000000000..2af76199ab --- /dev/null +++ b/libavcodec/h263dec.c @@ -0,0 +1,205 @@ +/* + * H263 decoder + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" + +//#define DEBUG + +static int h263_decode_init(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + + s->out_format = FMT_H263; + + s->width = avctx->width; + s->height = avctx->height; + + /* select sub codec */ + switch(avctx->codec->id) { + case CODEC_ID_H263: + break; + case CODEC_ID_OPENDIVX: + s->time_increment_bits = 4; /* default value for broken headers */ + s->h263_pred = 1; + break; + case CODEC_ID_MSMPEG4: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + break; + case CODEC_ID_H263I: + s->h263_intel = 1; + break; + default: + return -1; + } + + /* for h263, we allocate the images after having read the header */ + if (MPV_common_init(s) < 0) + return -1; + + if (s->h263_msmpeg4) + msmpeg4_decode_init_vlc(s); + else + h263_decode_init_vlc(s); + + return 0; +} + +static int h263_decode_end(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + + MPV_common_end(s); + return 0; +} + +static int h263_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + MpegEncContext *s = avctx->priv_data; + int ret; + DCTELEM block[6][64]; + AVPicture *pict = data; + +#ifdef DEBUG + printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); + printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); +#endif + + /* no supplementary picture */ + if (buf_size == 0) { + *data_size = 0; + return 0; + } + + init_get_bits(&s->gb, buf, buf_size); + + /* let's go :-) */ + if (s->h263_msmpeg4) { + ret = msmpeg4_decode_picture_header(s); + } else if (s->h263_pred) { + ret = mpeg4_decode_picture_header(s); + } else if (s->h263_intel) { + ret = intel_h263_decode_picture_header(s); + } else { + ret = h263_decode_picture_header(s); + } + if (ret < 0) + return -1; + + MPV_frame_start(s); + +#ifdef DEBUG + printf("qscale=%d\n", s->qscale); +#endif + + /* decode each macroblock */ + for(s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) { + for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { +#ifdef DEBUG + printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); +#endif + /* DCT & quantize */ + if (s->h263_msmpeg4) { + msmpeg4_dc_scale(s); + } else if (s->h263_pred) { + h263_dc_scale(s); + } else { + /* default quantization values */ + s->y_dc_scale = 8; + s->c_dc_scale = 8; + } + + memset(block, 0, sizeof(block)); + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + if (s->h263_msmpeg4) { + if (msmpeg4_decode_mb(s, block) < 0) + return -1; + } else { + if (h263_decode_mb(s, block) < 0) + return -1; + } + MPV_decode_mb(s, block); + } + } + + MPV_frame_end(s); + + pict->data[0] = s->current_picture[0]; + pict->data[1] = s->current_picture[1]; + pict->data[2] = s->current_picture[2]; + pict->linesize[0] = s->linesize; + pict->linesize[1] = s->linesize / 2; + pict->linesize[2] = s->linesize / 2; + + avctx->quality = s->qscale; + *data_size = sizeof(AVPicture); + return buf_size; +} + +AVCodec opendivx_decoder = { + "opendivx", + CODEC_TYPE_VIDEO, + CODEC_ID_OPENDIVX, + sizeof(MpegEncContext), + h263_decode_init, + NULL, + h263_decode_end, + h263_decode_frame, +}; + +AVCodec h263_decoder = { + "h263", + CODEC_TYPE_VIDEO, + CODEC_ID_H263, + sizeof(MpegEncContext), + h263_decode_init, + NULL, + h263_decode_end, + h263_decode_frame, +}; + +AVCodec msmpeg4_decoder = { + "msmpeg4", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4, + sizeof(MpegEncContext), + h263_decode_init, + NULL, + h263_decode_end, + h263_decode_frame, +}; + +AVCodec h263i_decoder = { + "h263i", + CODEC_TYPE_VIDEO, + CODEC_ID_H263I, + sizeof(MpegEncContext), + h263_decode_init, + NULL, + h263_decode_end, + h263_decode_frame, +}; + diff --git a/libavcodec/i386/cputest.c b/libavcodec/i386/cputest.c new file mode 100644 index 0000000000..9181f413d5 --- /dev/null +++ b/libavcodec/i386/cputest.c @@ -0,0 +1,102 @@ +/* Cpu detection code, extracted from mmx.h ((c)1997-99 by H. Dietz + and R. Fisher). Converted to C and improved by Gerard Lantau */ + +#include <stdlib.h> +#include "../dsputil.h" + +#define cpuid(index,eax,ebx,ecx,edx) \ + asm ("cpuid" \ + : "=a" (eax), "=b" (ebx), \ + "=c" (ecx), "=d" (edx) \ + : "a" (index) \ + : "cc") + +/* Function to test if multimedia instructions are supported... */ +int mm_support(void) +{ + int rval; + int eax, ebx, ecx, edx; + + + __asm__ __volatile__ ( + /* See if CPUID instruction is supported ... */ + /* ... Get copies of EFLAGS into eax and ecx */ + "pushf\n\t" + "popl %0\n\t" + "movl %0, %1\n\t" + + /* ... Toggle the ID bit in one copy and store */ + /* to the EFLAGS reg */ + "xorl $0x200000, %0\n\t" + "push %0\n\t" + "popf\n\t" + + /* ... Get the (hopefully modified) EFLAGS */ + "pushf\n\t" + "popl %0\n\t" + : "=a" (eax), "=c" (ecx) + : + : "cc" + ); + + if (eax == ecx) + return 0; /* CPUID not supported */ + + cpuid(0, eax, ebx, ecx, edx); + + if (ebx == 0x756e6547 && + edx == 0x49656e69 && + ecx == 0x6c65746e) { + + /* intel */ + inteltest: + cpuid(1, eax, ebx, ecx, edx); + if ((edx & 0x00800000) == 0) + return 0; + rval = MM_MMX; + if (edx & 0x02000000) + rval |= MM_MMXEXT | MM_SSE; + if (edx & 0x04000000) + rval |= MM_SSE2; + return rval; + } else if (ebx == 0x68747541 && + edx == 0x69746e65 && + ecx == 0x444d4163) { + /* AMD */ + cpuid(0x80000000, eax, ebx, ecx, edx); + if ((unsigned)eax < 0x80000001) + goto inteltest; + cpuid(0x80000001, eax, ebx, ecx, edx); + if ((edx & 0x00800000) == 0) + return 0; + rval = MM_MMX; + if (edx & 0x80000000) + rval |= MM_3DNOW; + if (edx & 0x00400000) + rval |= MM_MMXEXT; + return rval; + } else if (ebx == 0x69727943 && + edx == 0x736e4978 && + ecx == 0x64616574) { + /* Cyrix Section */ + /* See if extended CPUID level 80000001 is supported */ + /* The value of CPUID/80000001 for the 6x86MX is undefined + according to the Cyrix CPU Detection Guide (Preliminary + Rev. 1.01 table 1), so we'll check the value of eax for + CPUID/0 to see if standard CPUID level 2 is supported. + According to the table, the only CPU which supports level + 2 is also the only one which supports extended CPUID levels. + */ + if (eax != 2) + goto inteltest; + cpuid(0x80000001, eax, ebx, ecx, edx); + if ((eax & 0x00800000) == 0) + return 0; + rval = MM_MMX; + if (eax & 0x01000000) + rval |= MM_MMXEXT; + return rval; + } else { + return 0; + } +} diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c new file mode 100644 index 0000000000..701b70283d --- /dev/null +++ b/libavcodec/i386/dsputil_mmx.c @@ -0,0 +1,1061 @@ +/* + * MMX optimized DSP utils + * Copyright (c) 2000, 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * MMX optimization by Nick Kurshev <nickols_k@mail.ru> + */ + +#include "../dsputil.h" + +int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_sse(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); +int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h); + +/* pixel operations */ +static const unsigned short mm_wone[4] __attribute__ ((aligned(8))) = { 0x1, 0x1, 0x1, 0x1 }; +static const unsigned short mm_wtwo[4] __attribute__ ((aligned(8))) = { 0x2, 0x2, 0x2, 0x2 }; + +/***********************************/ +/* 3Dnow specific */ + +#define DEF(x) x ## _3dnow +/* for Athlons PAVGUSB is prefered */ +#define PAVGB "pavgusb" + +#include "dsputil_mmx_avg.h" + +#undef DEF +#undef PAVGB + +/***********************************/ +/* MMX2 specific */ + +#define DEF(x) x ## _sse + +/* Introduced only in MMX2 set */ +#define PAVGB "pavgb" + +#include "dsputil_mmx_avg.h" + +#undef DEF +#undef PAVGB + +/***********************************/ +/* standard MMX */ + +static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) +{ + DCTELEM *p; + const UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + __asm __volatile("pxor %%mm7, %%mm7":::"memory"); + for(i=0;i<4;i++) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, 8%0\n\t" + "movq %%mm1, 16%0\n\t" + "movq %%mm3, 24%0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size*2; + p += 16; + } + emms(); +} + +static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) +{ + const DCTELEM *p; + UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + for(i=0;i<2;i++) { + __asm __volatile( + "movq %4, %%mm0\n\t" + "movq 8%4, %%mm1\n\t" + "movq 16%4, %%mm2\n\t" + "movq 24%4, %%mm3\n\t" + "movq 32%4, %%mm4\n\t" + "movq 40%4, %%mm5\n\t" + "movq 48%4, %%mm6\n\t" + "movq 56%4, %%mm7\n\t" + "packuswb %%mm1, %%mm0\n\t" + "packuswb %%mm3, %%mm2\n\t" + "packuswb %%mm5, %%mm4\n\t" + "packuswb %%mm7, %%mm6\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, %1\n\t" + "movq %%mm4, %2\n\t" + "movq %%mm6, %3\n\t" + :"=m"(*pix), "=m"(*(pix+line_size)) + ,"=m"(*(pix+line_size*2)), "=m"(*(pix+line_size*3)) + :"m"(*p) + :"memory"); + pix += line_size*4; + p += 32; + } + emms(); +} + +static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size) +{ + const DCTELEM *p; + UINT8 *pix; + int i; + + /* read the pixels */ + p = block; + pix = pixels; + __asm __volatile("pxor %%mm7, %%mm7":::"memory"); + for(i=0;i<4;i++) { + __asm __volatile( + "movq %2, %%mm0\n\t" + "movq 8%2, %%mm1\n\t" + "movq 16%2, %%mm2\n\t" + "movq 24%2, %%mm3\n\t" + "movq %0, %%mm4\n\t" + "movq %1, %%mm6\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "paddsw %%mm4, %%mm0\n\t" + "paddsw %%mm5, %%mm1\n\t" + "movq %%mm6, %%mm5\n\t" + "punpcklbw %%mm7, %%mm6\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "paddsw %%mm6, %%mm2\n\t" + "paddsw %%mm5, %%mm3\n\t" + "packuswb %%mm1, %%mm0\n\t" + "packuswb %%mm3, %%mm2\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, %1\n\t" + :"=m"(*pix), "=m"(*(pix+line_size)) + :"m"(*p) + :"memory"); + pix += line_size*2; + p += 16; + } + emms(); +} + +static void put_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>2; + dh=h&3; + while(hh--) { + __asm __volatile( + "movq %4, %%mm0\n\t" + "movq %5, %%mm1\n\t" + "movq %6, %%mm2\n\t" + "movq %7, %%mm3\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + "movq %%mm2, %2\n\t" + "movq %%mm3, %3\n\t" + :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3)) + :"memory"); + pix = pix + line_size*4; + p = p + line_size*4; + } + while(dh--) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix = pix + line_size; + p = p + line_size; + } + emms(); +} + +static void put_pixels_x2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm4\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq 1%1, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm4, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; p += line_size; + } while (--h); + emms(); +} + +static void put_pixels_y2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm4\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm4, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size; + } while (--h); + emms(); +} + +static void put_pixels_xy2_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wtwo[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "psrlw $2, %%mm0\n\t" + "psrlw $2, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size; + } while(--h); + emms(); +} + +static void put_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq 1%1, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size; + } while (--h); + emms(); +} + +static void put_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size; + } while(--h); + emms(); +} + +static void put_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "psrlw $2, %%mm0\n\t" + "psrlw $2, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size; + } while(--h); + emms(); +} + +static void avg_pixels_mmx(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "paddusw %%mm6, %%mm0\n\t" + "paddusw %%mm6, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size; + } + while (--h); + emms(); +} + +static void avg_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm1\n\t" + "movq %0, %%mm0\n\t" + "movq 1%1, %%mm4\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "paddusw %%mm4, %%mm1\n\t" + "paddusw %%mm5, %%mm3\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm6, %%mm3\n\t" + "psrlw $1, %%mm1\n\t" + "psrlw $1, %%mm3\n\t" + "paddusw %%mm6, %%mm0\n\t" + "paddusw %%mm6, %%mm2\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size; + } while (--h); + emms(); +} + +static void avg_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm1\n\t" + "movq %0, %%mm0\n\t" + "movq %2, %%mm4\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "paddusw %%mm4, %%mm1\n\t" + "paddusw %%mm5, %%mm3\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm6, %%mm3\n\t" + "psrlw $1, %%mm1\n\t" + "psrlw $1, %%mm3\n\t" + "paddusw %%mm6, %%mm0\n\t" + "paddusw %%mm6, %%mm2\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size ; + } while(--h); + emms(); +} + +static void avg_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wtwo[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "movq %3, %%mm5\n\t" + "psrlw $2, %%mm0\n\t" + "movq %0, %%mm1\n\t" + "psrlw $2, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "paddusw %%mm5, %%mm0\n\t" + "paddusw %%mm5, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)), "m"(mm_wone[0]) + :"memory"); + pix += line_size; + p += line_size ; + } while(--h); + emms(); +} + +static void avg_no_rnd_pixels_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile("pxor %%mm7, %%mm7\n\t":::"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %0, %%mm1\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size ; + } while (--h); + emms(); +} + +static void avg_no_rnd_pixels_x2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t":::"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq 1%1, %%mm1\n\t" + "movq %0, %%mm4\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm5, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += line_size; + } while (--h); + emms(); +} + +static void avg_no_rnd_pixels_y2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t":::"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq %0, %%mm4\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm5, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size ; + } while(--h); + emms(); +} + +static void avg_no_rnd_pixels_xy2_mmx( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "movq %0, %%mm1\n\t" + "psrlw $2, %%mm0\n\t" + "movq %%mm1, %%mm3\n\t" + "psrlw $2, %%mm2\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "psrlw $1, %%mm0\n\t" + "psrlw $1, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size; + } while(--h); + emms(); +} + +static void sub_pixels_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile("pxor %%mm7, %%mm7":::"memory"); + do { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += 8; + } while (--h); + emms(); +} + +static void sub_pixels_x2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq %%mm2, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "paddusw %%mm4, %%mm2\n\t" + "paddusw %%mm5, %%mm3\n\t" + "paddusw %%mm6, %%mm2\n\t" + "paddusw %%mm6, %%mm3\n\t" + "psrlw $1, %%mm2\n\t" + "psrlw $1, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += 8; + } while (--h); + emms(); +} + +static void sub_pixels_y2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6" + ::"m"(mm_wone[0]):"memory"); + do { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq %2, %%mm4\n\t" + "movq %%mm2, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpckhbw %%mm7, %%mm5\n\t" + "paddusw %%mm4, %%mm2\n\t" + "paddusw %%mm5, %%mm3\n\t" + "paddusw %%mm6, %%mm2\n\t" + "paddusw %%mm6, %%mm3\n\t" + "psrlw $1, %%mm2\n\t" + "psrlw $1, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += 8; + } while (--h); + emms(); +} + +static void sub_pixels_xy2_mmx( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wtwo[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "movq %0, %%mm1\n\t" + "movq 8%0, %%mm3\n\t" + "psrlw $2, %%mm0\n\t" + "psrlw $2, %%mm2\n\t" + "psubsw %%mm0, %%mm1\n\t" + "psubsw %%mm2, %%mm3\n\t" + "movq %%mm1, %0\n\t" + "movq %%mm3, 8%0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += 8 ; + } while(--h); + emms(); +} + +void dsputil_init_mmx(void) +{ + mm_flags = mm_support(); +#if 0 + printf("CPU flags:"); + if (mm_flags & MM_MMX) + printf(" mmx"); + if (mm_flags & MM_MMXEXT) + printf(" mmxext"); + if (mm_flags & MM_3DNOW) + printf(" 3dnow"); + if (mm_flags & MM_SSE) + printf(" sse"); + if (mm_flags & MM_SSE2) + printf(" sse2"); + printf("\n"); +#endif + + if (mm_flags & MM_MMX) { + get_pixels = get_pixels_mmx; + put_pixels_clamped = put_pixels_clamped_mmx; + add_pixels_clamped = add_pixels_clamped_mmx; + + pix_abs16x16 = pix_abs16x16_mmx; + pix_abs16x16_x2 = pix_abs16x16_x2_mmx; + pix_abs16x16_y2 = pix_abs16x16_y2_mmx; + pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; + av_fdct = fdct_mmx; + + put_pixels_tab[0] = put_pixels_mmx; + put_pixels_tab[1] = put_pixels_x2_mmx; + put_pixels_tab[2] = put_pixels_y2_mmx; + put_pixels_tab[3] = put_pixels_xy2_mmx; + + put_no_rnd_pixels_tab[0] = put_pixels_mmx; + put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_mmx; + put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_mmx; + put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_mmx; + + avg_pixels_tab[0] = avg_pixels_mmx; + avg_pixels_tab[1] = avg_pixels_x2_mmx; + avg_pixels_tab[2] = avg_pixels_y2_mmx; + avg_pixels_tab[3] = avg_pixels_xy2_mmx; + + avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_mmx; + avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_mmx; + avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_mmx; + avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_mmx; + + sub_pixels_tab[0] = sub_pixels_mmx; + sub_pixels_tab[1] = sub_pixels_x2_mmx; + sub_pixels_tab[2] = sub_pixels_y2_mmx; + sub_pixels_tab[3] = sub_pixels_xy2_mmx; + + if (mm_flags & MM_MMXEXT) { + pix_abs16x16 = pix_abs16x16_sse; + } + + if (mm_flags & MM_SSE) { + put_pixels_tab[1] = put_pixels_x2_sse; + put_pixels_tab[2] = put_pixels_y2_sse; + + avg_pixels_tab[0] = avg_pixels_sse; + avg_pixels_tab[1] = avg_pixels_x2_sse; + avg_pixels_tab[2] = avg_pixels_y2_sse; + avg_pixels_tab[3] = avg_pixels_xy2_sse; + + sub_pixels_tab[1] = sub_pixels_x2_sse; + sub_pixels_tab[2] = sub_pixels_y2_sse; + } else if (mm_flags & MM_3DNOW) { + put_pixels_tab[1] = put_pixels_x2_3dnow; + put_pixels_tab[2] = put_pixels_y2_3dnow; + + avg_pixels_tab[0] = avg_pixels_3dnow; + avg_pixels_tab[1] = avg_pixels_x2_3dnow; + avg_pixels_tab[2] = avg_pixels_y2_3dnow; + avg_pixels_tab[3] = avg_pixels_xy2_3dnow; + + sub_pixels_tab[1] = sub_pixels_x2_3dnow; + sub_pixels_tab[2] = sub_pixels_y2_3dnow; + } + } +} diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h new file mode 100644 index 0000000000..e47b83f62d --- /dev/null +++ b/libavcodec/i386/dsputil_mmx_avg.h @@ -0,0 +1,352 @@ +/* + * DSP utils : average functions are compiled twice for 3dnow/mmx2 + * Copyright (c) 2000, 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * MMX optimization by Nick Kurshev <nickols_k@mail.ru> + */ + +static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>2; + dh=h&3; + while(hh--) { + __asm __volatile( + "movq %4, %%mm0\n\t" + "movq 1%4, %%mm1\n\t" + "movq %5, %%mm2\n\t" + "movq 1%5, %%mm3\n\t" + "movq %6, %%mm4\n\t" + "movq 1%6, %%mm5\n\t" + "movq %7, %%mm6\n\t" + "movq 1%7, %%mm7\n\t" + PAVGB" %%mm1, %%mm0\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm7, %%mm6\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, %1\n\t" + "movq %%mm4, %2\n\t" + "movq %%mm6, %3\n\t" + :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3)) + :"memory"); + pix += line_size*4; p += line_size*4; + } + while(dh--) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq 1%1, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; p += line_size; + } + emms(); +} + +static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + + hh=h>>1; + dh=h&1; + while(hh--) { + __asm __volatile( + "movq %2, %%mm0\n\t" + "movq %3, %%mm1\n\t" + "movq %4, %%mm2\n\t" + PAVGB" %%mm1, %%mm0\n\t" + PAVGB" %%mm2, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + :"=m"(*p), "=m"(*(p+line_size)) + :"m"(*pix), "m"(*(pix+line_size)), + "m"(*(pix+line_size*2)) + :"memory"); + pix += line_size*2; + p += line_size*2; + } + if(dh) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + } + emms(); +} + +static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>2; + dh=h&3; + while(hh--) { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %4, %%mm1\n\t" + "movq %1, %%mm2\n\t" + "movq %5, %%mm3\n\t" + "movq %2, %%mm4\n\t" + "movq %6, %%mm5\n\t" + "movq %3, %%mm6\n\t" + "movq %7, %%mm7\n\t" + PAVGB" %%mm1, %%mm0\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm7, %%mm6\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, %1\n\t" + "movq %%mm4, %2\n\t" + "movq %%mm6, %3\n\t" + :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3)) + :"memory"); + pix += line_size*4; p += line_size*4; + } + while(dh--) { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; p += line_size; + } + emms(); +} + +static void DEF(avg_pixels_x2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>1; + dh=h&1; + while(hh--) { + __asm __volatile( + "movq %2, %%mm2\n\t" + "movq 1%2, %%mm3\n\t" + "movq %3, %%mm4\n\t" + "movq 1%3, %%mm5\n\t" + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm2, %%mm0\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm4, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + :"=m"(*p), "=m"(*(p+line_size)) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size*2; + p += line_size*2; + } + if(dh) { + __asm __volatile( + "movq %1, %%mm1\n\t" + "movq 1%1, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %%mm2, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + } + emms(); +} + +static void DEF(avg_pixels_y2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>1; + dh=h&1; + while(hh--) { + __asm __volatile( + "movq %2, %%mm2\n\t" + "movq %3, %%mm3\n\t" + "movq %3, %%mm4\n\t" + "movq %4, %%mm5\n\t" + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm2, %%mm0\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm4, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + :"=m"(*p), "=m"(*(p+line_size)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)) + :"memory"); + pix += line_size*2; + p += line_size*2; + } + if(dh) { + __asm __volatile( + "movq %1, %%mm1\n\t" + "movq %2, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %%mm2, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + } + emms(); +} + +static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wtwo[0]):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "psrlw $2, %%mm0\n\t" + "psrlw $2, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + PAVGB" %0, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size ; + } while(--h); + emms(); +} + +static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7":::"memory"); + do { + __asm __volatile( + "movq 1%1, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += 8; + } while (--h); + emms(); +} + +static void DEF(sub_pixels_y2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7":::"memory"); + do { + __asm __volatile( + "movq %2, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += 8; + } while (--h); + emms(); +} + diff --git a/libavcodec/i386/fdct_mmx.s b/libavcodec/i386/fdct_mmx.s new file mode 100644 index 0000000000..75c67bfe5e --- /dev/null +++ b/libavcodec/i386/fdct_mmx.s @@ -0,0 +1,507 @@ +; ////////////////////////////////////////////////////////////////////////////// +; // +; // fdctam32.c - AP922 MMX(3D-Now) forward-DCT +; // ---------- +; // Intel Application Note AP-922 - fast, precise implementation of DCT +; // http://developer.intel.com/vtune/cbts/appnotes.htm +; // ---------- +; // +; // This routine can use a 3D-Now/MMX enhancement to increase the +; // accuracy of the fdct_col_4 macro. The dct_col function uses 3D-Now's +; // PMHULHRW instead of MMX's PMHULHW(and POR). The substitution improves +; // accuracy very slightly with performance penalty. If the target CPU +; // does not support 3D-Now, then this function cannot be executed. +; // +; // For a fast, precise MMX implementation of inverse-DCT +; // visit http://www.elecard.com/peter +; // +; // v1.0 07/22/2000 (initial release) +; // +; // liaor@iname.com http://members.tripod.com/~liaor +; ////////////////////////////////////////////////////////////////////////////// + +;;; +;;; A.Stevens Jul 2000: ported to nasm syntax and disentangled from +;;; from Win**** compiler specific stuff. +;;; All the real work was done above though. +;;; See above for how to optimise quality on 3DNow! CPU's + + ;; + ;; Macros for code-readability... + ;; +%define INP eax ; pointer to (short *blk) +%define OUT ecx ; pointer to output (temporary store space qwTemp[]) +%define TABLE ebx ; pointer to tab_frw_01234567[] +%define TABLEF ebx ; pointer to tg_all_16 +%define round_frw_row edx + + +%define x0 INP + 0*16 +%define x1 INP + 1*16 +%define x2 INP + 2*16 +%define x3 INP + 3*16 +%define x4 INP + 4*16 +%define x5 INP + 5*16 +%define x6 INP + 6*16 +%define x7 INP + 7*16 +%define y0 OUT + 0*16 +%define y1 OUT + 1*16 +%define y2 OUT + 2*16 +%define y3 OUT + 3*16 +%define y4 OUT + 4*16 +%define y5 OUT + 5*16 +%define y6 OUT + 6*16 +%define y7 OUT + 7*16 + + ;; + ;; Constants for DCT + ;; +%define BITS_FRW_ACC 3 ; 2 or 3 for accuracy +%define SHIFT_FRW_COL BITS_FRW_ACC +%define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) +%define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) +%define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) + +extern fdct_one_corr +extern fdct_r_row ; Defined in C for convenience + ;; + ;; Concatenated table of forward dct transformation coeffs. + ;; +extern fdct_tg_all_16 ; Defined in C for convenience + ;; Offsets into table.. + +%define tg_1_16 (TABLEF + 0) +%define tg_2_16 (TABLEF + 8) +%define tg_3_16 (TABLEF + 16) +%define cos_4_16 (TABLEF + 24) +%define ocos_4_16 (TABLEF + 32) + + ;; + ;; Concatenated table of forward dct coefficients + ;; +extern tab_frw_01234567 ; Defined in C for convenience + + ;; Offsets into table.. +SECTION .text + +global fdct_mmx + +;;; +;;; void fdct_mmx( short *blk ) +;;; + + + +; //////////////////////////////////////////////////////////////////////// +; // +; // The high-level pseudocode for the fdct_am32() routine : +; // +; // fdct_am32() +; // { +; // forward_dct_col03(); // dct_column transform on cols 0-3 +; // forward_dct_col47(); // dct_column transform on cols 4-7 +; // for ( j = 0; j < 8; j=j+1 ) +; // forward_dct_row1(j); // dct_row transform on row #j +; // } +; // +; + +align 32 +fdct_mmx: + push ebp ; save stack pointer + mov ebp, esp ; link + + push ebx + push ecx + push edx + push edi + + mov INP, [ebp+8]; ; input data is row 0 of blk[] + ;// transform the left half of the matrix (4 columns) + + lea TABLEF, [fdct_tg_all_16]; + mov OUT, INP; + +; lea round_frw_col, [r_frw_col] + ; for ( i = 0; i < 2; i = i + 1) + ; the for-loop is executed twice. We are better off unrolling the + ; loop to avoid branch misprediction. +.mmx32_fdct_col03: + movq mm0, [x1] ; 0 ; x1 + ;; + + movq mm1, [x6] ; 1 ; x6 + movq mm2, mm0 ; 2 ; x1 + + movq mm3, [x2] ; 3 ; x2 + paddsw mm0, mm1 ; t1 = x[1] + x[6] + + movq mm4, [x5] ; 4 ; x5 + psllw mm0, SHIFT_FRW_COL ; t1 + + movq mm5, [x0] ; 5 ; x0 + paddsw mm4, mm3 ; t2 = x[2] + x[5] + + paddsw mm5, [x7] ; t0 = x[0] + x[7] + psllw mm4, SHIFT_FRW_COL ; t2 + + movq mm6, mm0 ; 6 ; t1 + psubsw mm2, mm1 ; 1 ; t6 = x[1] - x[6] + + movq mm1, [tg_2_16] ; 1 ; tg_2_16 + psubsw mm0, mm4 ; tm12 = t1 - t2 + + movq mm7, [x3] ; 7 ; x3 + pmulhw mm1, mm0 ; tm12*tg_2_16 + + paddsw mm7, [x4] ; t3 = x[3] + x[4] + psllw mm5, SHIFT_FRW_COL ; t0 + + paddsw mm6, mm4 ; 4 ; tp12 = t1 + t2 + psllw mm7, SHIFT_FRW_COL ; t3 + + movq mm4, mm5 ; 4 ; t0 + psubsw mm5, mm7 ; tm03 = t0 - t3 + + paddsw mm1, mm5 ; y2 = tm03 + tm12*tg_2_16 + paddsw mm4, mm7 ; 7 ; tp03 = t0 + t3 + + por mm1, [fdct_one_corr] ; correction y2 +0.5 + psllw mm2, SHIFT_FRW_COL+1 ; t6 + + pmulhw mm5, [tg_2_16] ; tm03*tg_2_16 + movq mm7, mm4 ; 7 ; tp03 + + psubsw mm3, [x5] ; t5 = x[2] - x[5] + psubsw mm4, mm6 ; y4 = tp03 - tp12 + + movq [y2], mm1 ; 1 ; save y2 + paddsw mm7, mm6 ; 6 ; y0 = tp03 + tp12 + + movq mm1, [x3] ; 1 ; x3 + psllw mm3, SHIFT_FRW_COL+1 ; t5 + + psubsw mm1, [x4] ; t4 = x[3] - x[4] + movq mm6, mm2 ; 6 ; t6 + + movq [y4], mm4 ; 4 ; save y4 + paddsw mm2, mm3 ; t6 + t5 + + pmulhw mm2, [ocos_4_16] ; tp65 = (t6 + t5)*cos_4_16 + psubsw mm6, mm3 ; 3 ; t6 - t5 + + pmulhw mm6, [ocos_4_16] ; tm65 = (t6 - t5)*cos_4_16 + psubsw mm5, mm0 ; 0 ; y6 = tm03*tg_2_16 - tm12 + + por mm5, [fdct_one_corr] ; correction y6 +0.5 + psllw mm1, SHIFT_FRW_COL ; t4 + + por mm2, [fdct_one_corr] ; correction tp65 +0.5 + movq mm4, mm1 ; 4 ; t4 + + movq mm3, [x0] ; 3 ; x0 + paddsw mm1, mm6 ; tp465 = t4 + tm65 + + psubsw mm3, [x7] ; t7 = x[0] - x[7] + psubsw mm4, mm6 ; 6 ; tm465 = t4 - tm65 + + movq mm0, [tg_1_16] ; 0 ; tg_1_16 + psllw mm3, SHIFT_FRW_COL ; t7 + + movq mm6, [tg_3_16] ; 6 ; tg_3_16 + pmulhw mm0, mm1 ; tp465*tg_1_16 + + movq [y0], mm7 ; 7 ; save y0 + pmulhw mm6, mm4 ; tm465*tg_3_16 + + movq [y6], mm5 ; 5 ; save y6 + movq mm7, mm3 ; 7 ; t7 + + movq mm5, [tg_3_16] ; 5 ; tg_3_16 + psubsw mm7, mm2 ; tm765 = t7 - tp65 + + paddsw mm3, mm2 ; 2 ; tp765 = t7 + tp65 + pmulhw mm5, mm7 ; tm765*tg_3_16 + + paddsw mm0, mm3 ; y1 = tp765 + tp465*tg_1_16 + paddsw mm6, mm4 ; tm465*tg_3_16 + + pmulhw mm3, [tg_1_16] ; tp765*tg_1_16 + ;; + + por mm0, [fdct_one_corr] ; correction y1 +0.5 + paddsw mm5, mm7 ; tm765*tg_3_16 + + psubsw mm7, mm6 ; 6 ; y3 = tm765 - tm465*tg_3_16 + add INP, 0x08 ; ; increment pointer + + movq [y1], mm0 ; 0 ; save y1 + paddsw mm5, mm4 ; 4 ; y5 = tm765*tg_3_16 + tm465 + + movq [y3], mm7 ; 7 ; save y3 + psubsw mm3, mm1 ; 1 ; y7 = tp765*tg_1_16 - tp465 + + movq [y5], mm5 ; 5 ; save y5 + + +.mmx32_fdct_col47: ; begin processing last four columns + movq mm0, [x1] ; 0 ; x1 + ;; + movq [y7], mm3 ; 3 ; save y7 (columns 0-4) + ;; + + movq mm1, [x6] ; 1 ; x6 + movq mm2, mm0 ; 2 ; x1 + + movq mm3, [x2] ; 3 ; x2 + paddsw mm0, mm1 ; t1 = x[1] + x[6] + + movq mm4, [x5] ; 4 ; x5 + psllw mm0, SHIFT_FRW_COL ; t1 + + movq mm5, [x0] ; 5 ; x0 + paddsw mm4, mm3 ; t2 = x[2] + x[5] + + paddsw mm5, [x7] ; t0 = x[0] + x[7] + psllw mm4, SHIFT_FRW_COL ; t2 + + movq mm6, mm0 ; 6 ; t1 + psubsw mm2, mm1 ; 1 ; t6 = x[1] - x[6] + + movq mm1, [tg_2_16] ; 1 ; tg_2_16 + psubsw mm0, mm4 ; tm12 = t1 - t2 + + movq mm7, [x3] ; 7 ; x3 + pmulhw mm1, mm0 ; tm12*tg_2_16 + + paddsw mm7, [x4] ; t3 = x[3] + x[4] + psllw mm5, SHIFT_FRW_COL ; t0 + + paddsw mm6, mm4 ; 4 ; tp12 = t1 + t2 + psllw mm7, SHIFT_FRW_COL ; t3 + + movq mm4, mm5 ; 4 ; t0 + psubsw mm5, mm7 ; tm03 = t0 - t3 + + paddsw mm1, mm5 ; y2 = tm03 + tm12*tg_2_16 + paddsw mm4, mm7 ; 7 ; tp03 = t0 + t3 + + por mm1, [fdct_one_corr] ; correction y2 +0.5 + psllw mm2, SHIFT_FRW_COL+1 ; t6 + + pmulhw mm5, [tg_2_16] ; tm03*tg_2_16 + movq mm7, mm4 ; 7 ; tp03 + + psubsw mm3, [x5] ; t5 = x[2] - x[5] + psubsw mm4, mm6 ; y4 = tp03 - tp12 + + movq [y2+8], mm1 ; 1 ; save y2 + paddsw mm7, mm6 ; 6 ; y0 = tp03 + tp12 + + movq mm1, [x3] ; 1 ; x3 + psllw mm3, SHIFT_FRW_COL+1 ; t5 + + psubsw mm1, [x4] ; t4 = x[3] - x[4] + movq mm6, mm2 ; 6 ; t6 + + movq [y4+8], mm4 ; 4 ; save y4 + paddsw mm2, mm3 ; t6 + t5 + + pmulhw mm2, [ocos_4_16] ; tp65 = (t6 + t5)*cos_4_16 + psubsw mm6, mm3 ; 3 ; t6 - t5 + + pmulhw mm6, [ocos_4_16] ; tm65 = (t6 - t5)*cos_4_16 + psubsw mm5, mm0 ; 0 ; y6 = tm03*tg_2_16 - tm12 + + por mm5, [fdct_one_corr] ; correction y6 +0.5 + psllw mm1, SHIFT_FRW_COL ; t4 + + por mm2, [fdct_one_corr] ; correction tp65 +0.5 + movq mm4, mm1 ; 4 ; t4 + + movq mm3, [x0] ; 3 ; x0 + paddsw mm1, mm6 ; tp465 = t4 + tm65 + + psubsw mm3, [x7] ; t7 = x[0] - x[7] + psubsw mm4, mm6 ; 6 ; tm465 = t4 - tm65 + + movq mm0, [tg_1_16] ; 0 ; tg_1_16 + psllw mm3, SHIFT_FRW_COL ; t7 + + movq mm6, [tg_3_16] ; 6 ; tg_3_16 + pmulhw mm0, mm1 ; tp465*tg_1_16 + + movq [y0+8], mm7 ; 7 ; save y0 + pmulhw mm6, mm4 ; tm465*tg_3_16 + + movq [y6+8], mm5 ; 5 ; save y6 + movq mm7, mm3 ; 7 ; t7 + + movq mm5, [tg_3_16] ; 5 ; tg_3_16 + psubsw mm7, mm2 ; tm765 = t7 - tp65 + + paddsw mm3, mm2 ; 2 ; tp765 = t7 + tp65 + pmulhw mm5, mm7 ; tm765*tg_3_16 + + paddsw mm0, mm3 ; y1 = tp765 + tp465*tg_1_16 + paddsw mm6, mm4 ; tm465*tg_3_16 + + pmulhw mm3, [tg_1_16] ; tp765*tg_1_16 + ;; + + por mm0, [fdct_one_corr] ; correction y1 +0.5 + paddsw mm5, mm7 ; tm765*tg_3_16 + + psubsw mm7, mm6 ; 6 ; y3 = tm765 - tm465*tg_3_16 + ;; + + movq [y1+8], mm0 ; 0 ; save y1 + paddsw mm5, mm4 ; 4 ; y5 = tm765*tg_3_16 + tm465 + + movq [y3+8], mm7 ; 7 ; save y3 + psubsw mm3, mm1 ; 1 ; y7 = tp765*tg_1_16 - tp465 + + movq [y5+8], mm5 ; 5 ; save y5 + + movq [y7+8], mm3 ; 3 ; save y7 + +; emms; +; } ; end of forward_dct_col07() + ; done with dct_row transform + + + ; fdct_mmx32_cols() -- + ; the following subroutine repeats the row-transform operation, + ; except with different shift&round constants. This version + ; does NOT transpose the output again. Thus the final output + ; is transposed with respect to the source. + ; + ; The output is stored into blk[], which destroys the original + ; input data. + mov INP, [ebp+8]; ;; row 0 + mov edi, 0x08; ;x = 8 + + lea TABLE, [tab_frw_01234567]; ; row 0 + mov OUT, INP; + + lea round_frw_row, [fdct_r_row]; + ; for ( x = 8; x > 0; --x ) ; transform one row per iteration + +; ---------- loop begin + .lp_mmx_fdct_row1: + movd mm5, [INP+12]; ; mm5 = 7 6 + + punpcklwd mm5, [INP+8] ; mm5 = 5 7 4 6 + + movq mm2, mm5; ; mm2 = 5 7 4 6 + psrlq mm5, 32; ; mm5 = _ _ 5 7 + + movq mm0, [INP]; ; mm0 = 3 2 1 0 + punpcklwd mm5, mm2;; mm5 = 4 5 6 7 + + movq mm1, mm0; ; mm1 = 3 2 1 0 + paddsw mm0, mm5; ; mm0 = [3+4, 2+5, 1+6, 0+7] (xt3, xt2, xt1, xt0) + + psubsw mm1, mm5; ; mm1 = [3-4, 2-5, 1-6, 0-7] (xt7, xt6, xt5, xt4) + movq mm2, mm0; ; mm2 = [ xt3 xt2 xt1 xt0 ] + + ;movq [ xt3xt2xt1xt0 ], mm0; + ;movq [ xt7xt6xt5xt4 ], mm1; + + punpcklwd mm0, mm1;; mm0 = [ xt5 xt1 xt4 xt0 ] + + punpckhwd mm2, mm1;; mm2 = [ xt7 xt3 xt6 xt2 ] + movq mm1, mm2; ; mm1 + + ;; shuffle bytes around + +; movq mm0, [INP] ; 0 ; x3 x2 x1 x0 + +; movq mm1, [INP+8] ; 1 ; x7 x6 x5 x4 + movq mm2, mm0 ; 2 ; x3 x2 x1 x0 + + movq mm3, [TABLE] ; 3 ; w06 w04 w02 w00 + punpcklwd mm0, mm1 ; x5 x1 x4 x0 + + movq mm5, mm0 ; 5 ; x5 x1 x4 x0 + punpckldq mm0, mm0 ; x4 x0 x4 x0 [ xt2 xt0 xt2 xt0 ] + + movq mm4, [TABLE+8] ; 4 ; w07 w05 w03 w01 + punpckhwd mm2, mm1 ; 1 ; x7 x3 x6 x2 + + pmaddwd mm3, mm0 ; x4*w06+x0*w04 x4*w02+x0*w00 + movq mm6, mm2 ; 6 ; x7 x3 x6 x2 + + movq mm1, [TABLE+32] ; 1 ; w22 w20 w18 w16 + punpckldq mm2, mm2 ; x6 x2 x6 x2 [ xt3 xt1 xt3 xt1 ] + + pmaddwd mm4, mm2 ; x6*w07+x2*w05 x6*w03+x2*w01 + punpckhdq mm5, mm5 ; x5 x1 x5 x1 [ xt6 xt4 xt6 xt4 ] + + pmaddwd mm0, [TABLE+16] ; x4*w14+x0*w12 x4*w10+x0*w08 + punpckhdq mm6, mm6 ; x7 x3 x7 x3 [ xt7 xt5 xt7 xt5 ] + + movq mm7, [TABLE+40] ; 7 ; w23 w21 w19 w17 + pmaddwd mm1, mm5 ; x5*w22+x1*w20 x5*w18+x1*w16 +;mm3 = a1, a0 (y2,y0) +;mm1 = b1, b0 (y3,y1) +;mm0 = a3,a2 (y6,y4) +;mm5 = b3,b2 (y7,y5) + + paddd mm3, [round_frw_row] ; +rounder (y2,y0) + pmaddwd mm7, mm6 ; x7*w23+x3*w21 x7*w19+x3*w17 + + pmaddwd mm2, [TABLE+24] ; x6*w15+x2*w13 x6*w11+x2*w09 + paddd mm3, mm4 ; 4 ; a1=sum(even1) a0=sum(even0) ; now ( y2, y0) + + pmaddwd mm5, [TABLE+48] ; x5*w30+x1*w28 x5*w26+x1*w24 + ;; + + pmaddwd mm6, [TABLE+56] ; x7*w31+x3*w29 x7*w27+x3*w25 + paddd mm1, mm7 ; 7 ; b1=sum(odd1) b0=sum(odd0) ; now ( y3, y1) + + paddd mm0, [round_frw_row] ; +rounder (y6,y4) + psrad mm3, SHIFT_FRW_ROW ; (y2, y0) + + paddd mm1, [round_frw_row] ; +rounder (y3,y1) + paddd mm0, mm2 ; 2 ; a3=sum(even3) a2=sum(even2) ; now (y6, y4) + + paddd mm5, [round_frw_row] ; +rounder (y7,y5) + psrad mm1, SHIFT_FRW_ROW ; y1=a1+b1 y0=a0+b0 + + paddd mm5, mm6 ; 6 ; b3=sum(odd3) b2=sum(odd2) ; now ( y7, y5) + psrad mm0, SHIFT_FRW_ROW ;y3=a3+b3 y2=a2+b2 + + add OUT, 16; ; increment row-output address by 1 row + psrad mm5, SHIFT_FRW_ROW ; y4=a3-b3 y5=a2-b2 + + add INP, 16; ; increment row-address by 1 row + packssdw mm3, mm0 ; 0 ; y6 y4 y2 y0 + + packssdw mm1, mm5 ; 3 ; y7 y5 y3 y1 + movq mm6, mm3; ; mm0 = y6 y4 y2 y0 + + punpcklwd mm3, mm1; ; y3 y2 y1 y0 + sub edi, 0x01; ; i = i - 1 + + punpckhwd mm6, mm1; ; y7 y6 y5 y4 + add TABLE,64; ; increment to next table + + movq [OUT-16], mm3 ; 1 ; save y3 y2 y1 y0 + + movq [OUT-8], mm6 ; 7 ; save y7 y6 y5 y4 + + cmp edi, 0x00; + jg near .lp_mmx_fdct_row1; ; begin fdct processing on next row + ;; + ;; Tidy up and return + ;; + pop edi + pop edx + pop ecx + pop ebx + + pop ebp ; restore stack pointer + emms + ret +
\ No newline at end of file diff --git a/libavcodec/i386/fdctdata.c b/libavcodec/i386/fdctdata.c new file mode 100644 index 0000000000..e095d0f4e4 --- /dev/null +++ b/libavcodec/i386/fdctdata.c @@ -0,0 +1,143 @@ +////////////////////////////////////////////////////////////////////////////// +// +// fdctam32.c - AP922 MMX(3D-Now) forward-DCT +// ---------- +// Intel Application Note AP-922 - fast, precise implementation of DCT +// http://developer.intel.com/vtune/cbts/appnotes.htm +// ---------- +// +// This routine uses a 3D-Now/MMX enhancement to increase the +// accuracy of the fdct_col_4 macro. The dct_col function uses 3D-Now's +// PMHULHRW instead of MMX's PMHULHW(and POR). The substitution improves +// accuracy very slightly with performance penalty. If the target CPU +// does not support 3D-Now, then this function cannot be executed. +// fdctmm32.c contains the standard MMX implementation of AP-922. +// +// For a fast, precise MMX implementation of inverse-DCT +// visit http://www.elecard.com/peter +// +// v1.0 07/22/2000 (initial release) +// Initial release of AP922 MMX(3D-Now) forward_DCT. +// This code was tested with Visual C++ 6.0Pro + service_pack4 + +// processor_pack_beta! If you have the processor_pack_beta, you can +// remove the #include for amd3dx.h, and substitute the 'normal' +// assembly lines for the macro'd versions. Otherwise, this +// code should compile 'as is', under Visual C++ 6.0 Pro. +// +// liaor@iname.com http://members.tripod.com/~liaor +////////////////////////////////////////////////////////////////////////////// + +#include <inttypes.h> + +////////////////////////////////////////////////////////////////////// +// +// constants for the forward DCT +// ----------------------------- +// +// Be sure to check that your compiler is aligning all constants to QWORD +// (8-byte) memory boundaries! Otherwise the unaligned memory access will +// severely stall MMX execution. +// +////////////////////////////////////////////////////////////////////// + +#define BITS_FRW_ACC 3 //; 2 or 3 for accuracy +#define SHIFT_FRW_COL BITS_FRW_ACC +#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) +//#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) +#define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) +//#define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1) +#define RND_FRW_COL (1 << (SHIFT_FRW_COL-1)) + +//concatenated table, for forward DCT transformation +const int16_t fdct_tg_all_16[] = { + 13036, 13036, 13036, 13036, // tg * (2<<16) + 0.5 + 27146, 27146, 27146, 27146, // tg * (2<<16) + 0.5 + -21746, -21746, -21746, -21746, // tg * (2<<16) + 0.5 + -19195, -19195, -19195, -19195, //cos * (2<<16) + 0.5 + 23170, 23170, 23170, 23170 }; //cos * (2<<15) + 0.5 +const long long fdct_one_corr = 0x0001000100010001LL; +const long fdct_r_row[2] = {RND_FRW_ROW, RND_FRW_ROW }; + +const int16_t tab_frw_01234567[] = { // forward_dct coeff table + //row0 + 16384, 16384, 21407, -8867, // w09 w01 w08 w00 + 16384, 16384, 8867, -21407, // w13 w05 w12 w04 + 16384, -16384, 8867, 21407, // w11 w03 w10 w02 + -16384, 16384, -21407, -8867, // w15 w07 w14 w06 + 22725, 12873, 19266, -22725, // w22 w20 w18 w16 + 19266, 4520, -4520, -12873, // w23 w21 w19 w17 + 12873, 4520, 4520, 19266, // w30 w28 w26 w24 + -22725, 19266, -12873, -22725, // w31 w29 w27 w25 + + //row1 + 22725, 22725, 29692, -12299, // w09 w01 w08 w00 + 22725, 22725, 12299, -29692, // w13 w05 w12 w04 + 22725, -22725, 12299, 29692, // w11 w03 w10 w02 + -22725, 22725, -29692, -12299, // w15 w07 w14 w06 + 31521, 17855, 26722, -31521, // w22 w20 w18 w16 + 26722, 6270, -6270, -17855, // w23 w21 w19 w17 + 17855, 6270, 6270, 26722, // w30 w28 w26 w24 + -31521, 26722, -17855, -31521, // w31 w29 w27 w25 + + //row2 + 21407, 21407, 27969, -11585, // w09 w01 w08 w00 + 21407, 21407, 11585, -27969, // w13 w05 w12 w04 + 21407, -21407, 11585, 27969, // w11 w03 w10 w02 + -21407, 21407, -27969, -11585, // w15 w07 w14 w06 + 29692, 16819, 25172, -29692, // w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // w30 w28 w26 w24 + -29692, 25172, -16819, -29692, // w31 w29 w27 w25 + + //row3 + 19266, 19266, 25172, -10426, // w09 w01 w08 w00 + 19266, 19266, 10426, -25172, // w13 w05 w12 w04 + 19266, -19266, 10426, 25172, // w11 w03 w10 w02 + -19266, 19266, -25172, -10426, // w15 w07 w14 w06, + 26722, 15137, 22654, -26722, // w22 w20 w18 w16 + 22654, 5315, -5315, -15137, // w23 w21 w19 w17 + 15137, 5315, 5315, 22654, // w30 w28 w26 w24 + -26722, 22654, -15137, -26722, // w31 w29 w27 w25, + + //row4 + 16384, 16384, 21407, -8867, // w09 w01 w08 w00 + 16384, 16384, 8867, -21407, // w13 w05 w12 w04 + 16384, -16384, 8867, 21407, // w11 w03 w10 w02 + -16384, 16384, -21407, -8867, // w15 w07 w14 w06 + 22725, 12873, 19266, -22725, // w22 w20 w18 w16 + 19266, 4520, -4520, -12873, // w23 w21 w19 w17 + 12873, 4520, 4520, 19266, // w30 w28 w26 w24 + -22725, 19266, -12873, -22725, // w31 w29 w27 w25 + + //row5 + 19266, 19266, 25172, -10426, // w09 w01 w08 w00 + 19266, 19266, 10426, -25172, // w13 w05 w12 w04 + 19266, -19266, 10426, 25172, // w11 w03 w10 w02 + -19266, 19266, -25172, -10426, // w15 w07 w14 w06 + 26722, 15137, 22654, -26722, // w22 w20 w18 w16 + 22654, 5315, -5315, -15137, // w23 w21 w19 w17 + 15137, 5315, 5315, 22654, // w30 w28 w26 w24 + -26722, 22654, -15137, -26722, // w31 w29 w27 w25 + + //row6 + 21407, 21407, 27969, -11585, // w09 w01 w08 w00 + 21407, 21407, 11585, -27969, // w13 w05 w12 w04 + 21407, -21407, 11585, 27969, // w11 w03 w10 w02 + -21407, 21407, -27969, -11585, // w15 w07 w14 w06, + 29692, 16819, 25172, -29692, // w22 w20 w18 w16 + 25172, 5906, -5906, -16819, // w23 w21 w19 w17 + 16819, 5906, 5906, 25172, // w30 w28 w26 w24 + -29692, 25172, -16819, -29692, // w31 w29 w27 w25, + + //row7 + 22725, 22725, 29692, -12299, // w09 w01 w08 w00 + 22725, 22725, 12299, -29692, // w13 w05 w12 w04 + 22725, -22725, 12299, 29692, // w11 w03 w10 w02 + -22725, 22725, -29692, -12299, // w15 w07 w14 w06, + 31521, 17855, 26722, -31521, // w22 w20 w18 w16 + 26722, 6270, -6270, -17855, // w23 w21 w19 w17 + 17855, 6270, 6270, 26722, // w30 w28 w26 w24 + -31521, 26722, -17855, -31521 // w31 w29 w27 w25 +}; + + diff --git a/libavcodec/i386/mmx.h b/libavcodec/i386/mmx.h new file mode 100644 index 0000000000..a82c0eda32 --- /dev/null +++ b/libavcodec/i386/mmx.h @@ -0,0 +1,536 @@ +/* mmx.h + + MultiMedia eXtensions GCC interface library for IA32. + + To use this library, simply include this header file + and compile with GCC. You MUST have inlining enabled + in order for mmx_ok() to work; this can be done by + simply using -O on the GCC command line. + + Compiling with -DMMX_TRACE will cause detailed trace + output to be sent to stderr for each mmx operation. + This adds lots of code, and obviously slows execution to + a crawl, but can be very useful for debugging. + + THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT + LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY + AND FITNESS FOR ANY PARTICULAR PURPOSE. + + 1997-99 by H. Dietz and R. Fisher + + Notes: + It appears that the latest gas has the pand problem fixed, therefore + I'll undefine BROKEN_PAND by default. +*/ + +#ifndef _MMX_H +#define _MMX_H + + +/* Warning: at this writing, the version of GAS packaged + with most Linux distributions does not handle the + parallel AND operation mnemonic correctly. If the + symbol BROKEN_PAND is defined, a slower alternative + coding will be used. If execution of mmxtest results + in an illegal instruction fault, define this symbol. +*/ +#undef BROKEN_PAND + + +/* The type of an value that fits in an MMX register + (note that long long constant values MUST be suffixed + by LL and unsigned long long values by ULL, lest + they be truncated by the compiler) +*/ +typedef union { + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} __attribute__ ((aligned (8))) mmx_t; /* On an 8-byte (64-bit) boundary */ + + +/* Helper functions for the instruction macros that follow... + (note that memory-to-register, m2r, instructions are nearly + as efficient as register-to-register, r2r, instructions; + however, memory-to-memory instructions are really simulated + as a convenience, and are only 1/3 as efficient) +*/ +#ifdef MMX_TRACE + +/* Include the stuff for printing a trace to stderr... +*/ + +#include <stdio.h> + +#define mmx_i2r(op, imm, reg) \ + { \ + mmx_t mmx_trace; \ + mmx_trace.uq = (imm); \ + fprintf(stderr, #op "_i2r(" #imm "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm)); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_m2r(op, mem, reg) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (mem); \ + fprintf(stderr, #op "_m2r(" #mem "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)); \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #reg "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_r2m(op, reg, mem) \ + { \ + mmx_t mmx_trace; \ + __asm__ __volatile__ ("movq %%" #reg ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #op "_r2m(" #reg "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + mmx_trace = (mem); \ + fprintf(stderr, #mem "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ); \ + mmx_trace = (mem); \ + fprintf(stderr, #mem "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_r2r(op, regs, regd) \ + { \ + mmx_t mmx_trace; \ + __asm__ __volatile__ ("movq %%" #regs ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #op "_r2r(" #regs "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %%" #regd ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #regd "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd); \ + __asm__ __volatile__ ("movq %%" #regd ", %0" \ + : "=X" (mmx_trace) \ + : /* nothing */ ); \ + fprintf(stderr, #regd "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#define mmx_m2m(op, mems, memd) \ + { \ + mmx_t mmx_trace; \ + mmx_trace = (mems); \ + fprintf(stderr, #op "_m2m(" #mems "=0x%08x%08x, ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + mmx_trace = (memd); \ + fprintf(stderr, #memd "=0x%08x%08x) => ", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ + #op " %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (memd) \ + : "X" (mems)); \ + mmx_trace = (memd); \ + fprintf(stderr, #memd "=0x%08x%08x\n", \ + mmx_trace.d[1], mmx_trace.d[0]); \ + } + +#else + +/* These macros are a lot simpler without the tracing... +*/ + +#define mmx_i2r(op, imm, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_m2r(op, mem, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op, reg, mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op, regs, regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + +#define mmx_m2m(op, mems, memd) \ + __asm__ __volatile__ ("movq %0, %%mm0\n\t" \ + #op " %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=m" (memd) \ + : "m" (mems)) + +#endif + + +/* 1x64 MOVe Quadword + (this is both a load and a store... + in fact, it is the only way to store) +*/ +#define movq_m2r(var, reg) mmx_m2r(movq, var, reg) +#define movq_r2m(reg, var) mmx_r2m(movq, reg, var) +#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd) +#define movq(vars, vard) \ + __asm__ __volatile__ ("movq %1, %%mm0\n\t" \ + "movq %%mm0, %0" \ + : "=X" (vard) \ + : "X" (vars)) + + +/* 1x32 MOVe Doubleword + (like movq, this is both load and store... + but is most useful for moving things between + mmx registers and ordinary registers) +*/ +#define movd_m2r(var, reg) mmx_m2r(movd, var, reg) +#define movd_r2m(reg, var) mmx_r2m(movd, reg, var) +#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd) +#define movd(vars, vard) \ + __asm__ __volatile__ ("movd %1, %%mm0\n\t" \ + "movd %%mm0, %0" \ + : "=X" (vard) \ + : "X" (vars)) + + +/* 2x32, 4x16, and 8x8 Parallel ADDs +*/ +#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg) +#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd) +#define paddd(vars, vard) mmx_m2m(paddd, vars, vard) + +#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg) +#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd) +#define paddw(vars, vard) mmx_m2m(paddw, vars, vard) + +#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg) +#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd) +#define paddb(vars, vard) mmx_m2m(paddb, vars, vard) + + +/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic +*/ +#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg) +#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd) +#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard) + +#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg) +#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd) +#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard) + + +/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic +*/ +#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg) +#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd) +#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard) + +#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg) +#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd) +#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel SUBs +*/ +#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg) +#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd) +#define psubd(vars, vard) mmx_m2m(psubd, vars, vard) + +#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg) +#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd) +#define psubw(vars, vard) mmx_m2m(psubw, vars, vard) + +#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg) +#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd) +#define psubb(vars, vard) mmx_m2m(psubb, vars, vard) + + +/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic +*/ +#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg) +#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd) +#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard) + +#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg) +#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd) +#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard) + + +/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic +*/ +#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg) +#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd) +#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard) + +#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg) +#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd) +#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard) + + +/* 4x16 Parallel MULs giving Low 4x16 portions of results +*/ +#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg) +#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd) +#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard) + + +/* 4x16 Parallel MULs giving High 4x16 portions of results +*/ +#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg) +#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd) +#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard) + + +/* 4x16->2x32 Parallel Mul-ADD + (muls like pmullw, then adds adjacent 16-bit fields + in the multiply result to make the final 2x32 result) +*/ +#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg) +#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd) +#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard) + + +/* 1x64 bitwise AND +*/ +#ifdef BROKEN_PAND +#define pand_m2r(var, reg) \ + { \ + mmx_m2r(pandn, (mmx_t) -1LL, reg); \ + mmx_m2r(pandn, var, reg); \ + } +#define pand_r2r(regs, regd) \ + { \ + mmx_m2r(pandn, (mmx_t) -1LL, regd); \ + mmx_r2r(pandn, regs, regd) \ + } +#define pand(vars, vard) \ + { \ + movq_m2r(vard, mm0); \ + mmx_m2r(pandn, (mmx_t) -1LL, mm0); \ + mmx_m2r(pandn, vars, mm0); \ + movq_r2m(mm0, vard); \ + } +#else +#define pand_m2r(var, reg) mmx_m2r(pand, var, reg) +#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd) +#define pand(vars, vard) mmx_m2m(pand, vars, vard) +#endif + + +/* 1x64 bitwise AND with Not the destination +*/ +#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg) +#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd) +#define pandn(vars, vard) mmx_m2m(pandn, vars, vard) + + +/* 1x64 bitwise OR +*/ +#define por_m2r(var, reg) mmx_m2r(por, var, reg) +#define por_r2r(regs, regd) mmx_r2r(por, regs, regd) +#define por(vars, vard) mmx_m2m(por, vars, vard) + + +/* 1x64 bitwise eXclusive OR +*/ +#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg) +#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd) +#define pxor(vars, vard) mmx_m2m(pxor, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality + (resulting fields are either 0 or -1) +*/ +#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd) +#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard) + +#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd) +#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard) + +#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd) +#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard) + + +/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than + (resulting fields are either 0 or -1) +*/ +#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd) +#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard) + +#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd) +#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard) + +#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd) +#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard) + + +/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical +*/ +#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg) +#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg) +#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd) +#define psllq(vars, vard) mmx_m2m(psllq, vars, vard) + +#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg) +#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg) +#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd) +#define pslld(vars, vard) mmx_m2m(pslld, vars, vard) + +#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg) +#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg) +#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd) +#define psllw(vars, vard) mmx_m2m(psllw, vars, vard) + + +/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical +*/ +#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg) +#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg) +#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd) +#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard) + +#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg) +#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg) +#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd) +#define psrld(vars, vard) mmx_m2m(psrld, vars, vard) + +#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg) +#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg) +#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd) +#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard) + + +/* 2x32 and 4x16 Parallel Shift Right Arithmetic +*/ +#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg) +#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg) +#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd) +#define psrad(vars, vard) mmx_m2m(psrad, vars, vard) + +#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg) +#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg) +#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd) +#define psraw(vars, vard) mmx_m2m(psraw, vars, vard) + + +/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate + (packs source and dest fields into dest in that order) +*/ +#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg) +#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd) +#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard) + +#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg) +#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd) +#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard) + + +/* 4x16->8x8 PACK and Unsigned Saturate + (packs source and dest fields into dest in that order) +*/ +#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg) +#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd) +#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard) + + +/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low + (interleaves low half of dest with low half of source + as padding in each result field) +*/ +#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg) +#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd) +#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard) + +#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg) +#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd) +#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard) + +#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg) +#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd) +#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard) + + +/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High + (interleaves high half of dest with high half of source + as padding in each result field) +*/ +#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg) +#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd) +#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard) + +#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg) +#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd) +#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard) + +#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg) +#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd) +#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard) + + +/* Empty MMx State + (used to clean-up when going from mmx to float use + of the registers that are shared by both; note that + there is no float-to-mmx operation needed, because + only the float tag word info is corruptible) +*/ +#ifdef MMX_TRACE + +#define emms() \ + { \ + fprintf(stderr, "emms()\n"); \ + __asm__ __volatile__ ("emms"); \ + } + +#else + +#define emms() __asm__ __volatile__ ("emms") + +#endif + +#endif + diff --git a/libavcodec/i386/sad_mmx.s b/libavcodec/i386/sad_mmx.s new file mode 100644 index 0000000000..b512639c25 --- /dev/null +++ b/libavcodec/i386/sad_mmx.s @@ -0,0 +1,798 @@ +; MMX/SSE optimized routines for SAD of 16*16 macroblocks +; Copyright (C) Juan J. Sierralta P. <juanjo@atmlab.utfsm.cl> +; +; dist1_* Original Copyright (C) 2000 Chris Atenasio <chris@crud.net> +; Enhancements and rest Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk> + +; +; This program is free software; you can redistribute it and/or +; modify it under the terms of the GNU General Public License +; as published by the Free Software Foundation; either version 2 +; of the License, or (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; if not, write to the Free Software +; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +; + +global pix_abs16x16_mmx + +; int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h); +; esi = p1 (init: blk1) +; edi = p2 (init: blk2) +; ecx = rowsleft (init: h) +; edx = lx; + +; mm0 = distance accumulators (4 words) +; mm1 = distance accumulators (4 words) +; mm2 = temp +; mm3 = temp +; mm4 = temp +; mm5 = temp +; mm6 = 0 +; mm7 = temp + + +align 32 +pix_abs16x16_mmx: + push ebp ; save frame pointer + mov ebp, esp + + push ebx ; Saves registers (called saves convention in + push ecx ; x86 GCC it seems) + push edx ; + push esi + push edi + + pxor mm0, mm0 ; zero acculumators + pxor mm1, mm1 + pxor mm6, mm6 + mov esi, [ebp+8] ; get pix1 + mov edi, [ebp+12] ; get pix2 + mov edx, [ebp+16] ; get lx + mov ecx, [ebp+20] ; get rowsleft + jmp .nextrow +align 32 + +.nextrow: + ; First 8 bytes of the row + + movq mm4, [edi] ; load first 8 bytes of pix2 row + movq mm5, [esi] ; load first 8 bytes of pix1 row + movq mm3, mm4 ; mm4 := abs(mm4-mm5) + movq mm2,[esi+8] ; load last 8 bytes of pix1 row + psubusb mm4, mm5 + movq mm7,[edi+8] ; load last 8 bytes of pix2 row + psubusb mm5, mm3 + por mm4, mm5 + + ; Last 8 bytes of the row + + movq mm3, mm7 ; mm7 := abs(mm7-mm2) + psubusb mm7, mm2 + psubusb mm2, mm3 + por mm7, mm2 + + ; Now mm4 and mm7 have 16 absdiffs to add + + ; First 8 bytes of the row2 + + + add edi, edx + movq mm2, [edi] ; load first 8 bytes of pix2 row + add esi, edx + movq mm5, [esi] ; load first 8 bytes of pix1 row + + + + movq mm3, mm2 ; mm2 := abs(mm2-mm5) + psubusb mm2, mm5 + movq mm6,[esi+8] ; load last 8 bytes of pix1 row + psubusb mm5, mm3 + por mm2, mm5 + + ; Last 8 bytes of the row2 + + movq mm5,[edi+8] ; load last 8 bytes of pix2 row + + + movq mm3, mm5 ; mm5 := abs(mm5-mm6) + psubusb mm5, mm6 + psubusb mm6, mm3 + por mm5, mm6 + + ; Now mm2, mm4, mm5, mm7 have 32 absdiffs + + movq mm3, mm7 + + pxor mm6, mm6 ; Zero mm6 + + punpcklbw mm3, mm6 ; Unpack to words and add + punpckhbw mm7, mm6 + paddusw mm7, mm3 + + movq mm3, mm5 + + punpcklbw mm3, mm6 ; Unpack to words and add + punpckhbw mm5, mm6 + paddusw mm5, mm3 + + paddusw mm0, mm7 ; Add to the acumulator (mm0) + paddusw mm1, mm5 ; Add to the acumulator (mm1) + + movq mm3, mm4 + + punpcklbw mm3, mm6 ; Unpack to words and add + punpckhbw mm4, mm6 + movq mm5, mm2 + paddusw mm4, mm3 + + + + punpcklbw mm5, mm6 ; Unpack to words and add + punpckhbw mm2, mm6 + paddusw mm2, mm5 + + ; Loop termination + + add esi, edx ; update pointers to next row + paddusw mm0, mm4 ; Add to the acumulator (mm0) + add edi, edx + sub ecx,2 + paddusw mm1, mm2 ; Add to the acumulator (mm1) + test ecx, ecx ; check rowsleft + jnz near .nextrow + + paddusw mm0, mm1 + movq mm2, mm0 ; Copy mm0 to mm2 + psrlq mm2, 32 + paddusw mm0, mm2 ; Add + movq mm3, mm0 + psrlq mm3, 16 + paddusw mm0, mm3 + movd eax, mm0 ; Store return value + and eax, 0xffff + + pop edi + pop esi + pop edx + pop ecx + pop ebx + + pop ebp ; restore stack pointer + + ;emms ; clear mmx registers + ret ; return + +global pix_abs16x16_sse + +; int pix_abs16x16_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h); +; esi = p1 (init: blk1) +; edi = p2 (init: blk2) +; ecx = rowsleft (init: h) +; edx = lx; + +; mm0 = distance accumulators (4 words) +; mm1 = distance accumulators (4 words) +; mm2 = temp +; mm3 = temp +; mm4 = temp +; mm5 = temp +; mm6 = temp +; mm7 = temp + + +align 32 +pix_abs16x16_sse: + push ebp ; save frame pointer + mov ebp, esp + + push ebx ; Saves registers (called saves convention in + push ecx ; x86 GCC it seems) + push edx ; + push esi + push edi + + pxor mm0, mm0 ; zero acculumators + pxor mm1, mm1 + mov esi, [ebp+8] ; get pix1 + mov edi, [ebp+12] ; get pix2 + mov edx, [ebp+16] ; get lx + mov ecx, [ebp+20] ; get rowsleft + jmp .next4row +align 32 + +.next4row: + ; First row + + movq mm4, [edi] ; load first 8 bytes of pix2 row + movq mm5, [edi+8] ; load last 8 bytes of pix2 row + psadbw mm4, [esi] ; SAD of first 8 bytes + psadbw mm5, [esi+8] ; SAD of last 8 bytes + paddw mm0, mm4 ; Add to acumulators + paddw mm1, mm5 + + ; Second row + + add edi, edx; + add esi, edx; + + movq mm6, [edi] ; load first 8 bytes of pix2 row + movq mm7, [edi+8] ; load last 8 bytes of pix2 row + psadbw mm6, [esi] ; SAD of first 8 bytes + psadbw mm7, [esi+8] ; SAD of last 8 bytes + paddw mm0, mm6 ; Add to acumulators + paddw mm1, mm7 + + ; Third row + + add edi, edx; + add esi, edx; + + movq mm4, [edi] ; load first 8 bytes of pix2 row + movq mm5, [edi+8] ; load last 8 bytes of pix2 row + psadbw mm4, [esi] ; SAD of first 8 bytes + psadbw mm5, [esi+8] ; SAD of last 8 bytes + paddw mm0, mm4 ; Add to acumulators + paddw mm1, mm5 + + ; Fourth row + + add edi, edx; + add esi, edx; + + movq mm6, [edi] ; load first 8 bytes of pix2 row + movq mm7, [edi+8] ; load last 8 bytes of pix2 row + psadbw mm6, [esi] ; SAD of first 8 bytes + psadbw mm7, [esi+8] ; SAD of last 8 bytes + paddw mm0, mm6 ; Add to acumulators + paddw mm1, mm7 + + ; Loop termination + + add esi, edx ; update pointers to next row + add edi, edx + sub ecx,4 + test ecx, ecx ; check rowsleft + jnz near .next4row + + paddd mm0, mm1 ; Sum acumulators + movd eax, mm0 ; Store return value + + pop edi + pop esi + pop edx + pop ecx + pop ebx + + pop ebp ; restore stack pointer + + ;emms ; clear mmx registers + ret ; return + +global pix_abs16x16_x2_mmx + +; int pix_abs16x16_x2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h); +; esi = p1 (init: blk1) +; edi = p2 (init: blk2) +; ecx = rowsleft (init: h) +; edx = lx; + +; mm0 = distance accumulators (4 words) +; mm1 = distance accumulators (4 words) +; mm2 = temp +; mm3 = temp +; mm4 = temp +; mm5 = temp +; mm6 = 0 +; mm7 = temp + + +align 32 +pix_abs16x16_x2_mmx: + push ebp ; save frame pointer + mov ebp, esp + + push ebx ; Saves registers (called saves convention in + push ecx ; x86 GCC it seems) + push edx ; + push esi + push edi + + pxor mm0, mm0 ; zero acculumators + pxor mm1, mm1 + pxor mm6, mm6 + mov esi, [ebp+8] ; get pix1 + mov edi, [ebp+12] ; get pix2 + mov edx, [ebp+16] ; get lx + mov ecx, [ebp+20] ; get rowsleft + jmp .nextrow_x2 +align 32 + +.nextrow_x2: + ; First 8 bytes of the row + + movq mm4, [edi] ; load first 8 bytes of pix2 row + movq mm5, [edi+1] ; load bytes 1-8 of pix2 row + + movq mm2, mm4 ; copy mm4 on mm2 + movq mm3, mm5 ; copy mm5 on mm3 + punpcklbw mm4, mm6 ; first 4 bytes of [edi] on mm4 + punpcklbw mm5, mm6 ; first 4 bytes of [edi+1] on mm5 + paddusw mm4, mm5 ; mm4 := first 4 bytes interpolated in words + psrlw mm4, 1 + + punpckhbw mm2, mm6 ; last 4 bytes of [edi] on mm2 + punpckhbw mm3, mm6 ; last 4 bytes of [edi+1] on mm3 + paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words + psrlw mm2, 1 + + packuswb mm4, mm2 ; pack 8 bytes interpolated on mm4 + movq mm5,[esi] ; load first 8 bytes of pix1 row + + movq mm3, mm4 ; mm4 := abs(mm4-mm5) + psubusb mm4, mm5 + psubusb mm5, mm3 + por mm4, mm5 + + ; Last 8 bytes of the row + + movq mm7, [edi+8] ; load last 8 bytes of pix2 row + movq mm5, [edi+9] ; load bytes 10-17 of pix2 row + + movq mm2, mm7 ; copy mm7 on mm2 + movq mm3, mm5 ; copy mm5 on mm3 + punpcklbw mm7, mm6 ; first 4 bytes of [edi+8] on mm7 + punpcklbw mm5, mm6 ; first 4 bytes of [edi+9] on mm5 + paddusw mm7, mm5 ; mm1 := first 4 bytes interpolated in words + psrlw mm7, 1 + + punpckhbw mm2, mm6 ; last 4 bytes of [edi] on mm2 + punpckhbw mm3, mm6 ; last 4 bytes of [edi+1] on mm3 + paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words + psrlw mm2, 1 + + packuswb mm7, mm2 ; pack 8 bytes interpolated on mm1 + movq mm5,[esi+8] ; load last 8 bytes of pix1 row + + movq mm3, mm7 ; mm7 := abs(mm1-mm5) + psubusb mm7, mm5 + psubusb mm5, mm3 + por mm7, mm5 + + ; Now mm4 and mm7 have 16 absdiffs to add + + movq mm3, mm4 ; Make copies of these bytes + movq mm2, mm7 + + punpcklbw mm4, mm6 ; Unpack to words and add + punpcklbw mm7, mm6 + paddusw mm4, mm7 + paddusw mm0, mm4 ; Add to the acumulator (mm0) + + punpckhbw mm3, mm6 ; Unpack to words and add + punpckhbw mm2, mm6 + paddusw mm3, mm2 + paddusw mm1, mm3 ; Add to the acumulator (mm1) + + ; Loop termination + + add esi, edx ; update pointers to next row + add edi, edx + + sub ecx,1 + test ecx, ecx ; check rowsleft + jnz near .nextrow_x2 + + paddusw mm0, mm1 + + movq mm1, mm0 ; Copy mm0 to mm1 + psrlq mm1, 32 + paddusw mm0, mm1 ; Add + movq mm2, mm0 + psrlq mm2, 16 + paddusw mm0, mm2 + movd eax, mm0 ; Store return value + and eax, 0xffff + + pop edi + pop esi + pop edx + pop ecx + pop ebx + + pop ebp ; restore stack pointer + + emms ; clear mmx registers + ret ; return + +global pix_abs16x16_y2_mmx + +; int pix_abs16x16_y2_mmx(unsigned char *pix1,unsigned char *pix2, int lx, int h); +; esi = p1 (init: blk1) +; edi = p2 (init: blk2) +; ebx = p2 + lx +; ecx = rowsleft (init: h) +; edx = lx; + +; mm0 = distance accumulators (4 words) +; mm1 = distance accumulators (4 words) +; mm2 = temp +; mm3 = temp +; mm4 = temp +; mm5 = temp +; mm6 = 0 +; mm7 = temp + + +align 32 +pix_abs16x16_y2_mmx: + push ebp ; save frame pointer + mov ebp, esp + + push ebx ; Saves registers (called saves convention in + push ecx ; x86 GCC it seems) + push edx ; + push esi + push edi + + pxor mm0, mm0 ; zero acculumators + pxor mm1, mm1 + pxor mm6, mm6 + mov esi, [ebp+8] ; get pix1 + mov edi, [ebp+12] ; get pix2 + mov edx, [ebp+16] ; get lx + mov ecx, [ebp+20] ; get rowsleft + mov ebx, edi + add ebx, edx + jmp .nextrow_y2 +align 32 + +.nextrow_y2: + ; First 8 bytes of the row + + movq mm4, [edi] ; load first 8 bytes of pix2 row + movq mm5, [ebx] ; load bytes 1-8 of pix2 row + + movq mm2, mm4 ; copy mm4 on mm2 + movq mm3, mm5 ; copy mm5 on mm3 + punpcklbw mm4, mm6 ; first 4 bytes of [edi] on mm4 + punpcklbw mm5, mm6 ; first 4 bytes of [ebx] on mm5 + paddusw mm4, mm5 ; mm4 := first 4 bytes interpolated in words + psrlw mm4, 1 + + punpckhbw mm2, mm6 ; last 4 bytes of [edi] on mm2 + punpckhbw mm3, mm6 ; last 4 bytes of [edi+1] on mm3 + paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words + psrlw mm2, 1 + + packuswb mm4, mm2 ; pack 8 bytes interpolated on mm4 + movq mm5,[esi] ; load first 8 bytes of pix1 row + + movq mm3, mm4 ; mm4 := abs(mm4-mm5) + psubusb mm4, mm5 + psubusb mm5, mm3 + por mm4, mm5 + + ; Last 8 bytes of the row + + movq mm7, [edi+8] ; load last 8 bytes of pix2 row + movq mm5, [ebx+8] ; load bytes 10-17 of pix2 row + + movq mm2, mm7 ; copy mm7 on mm2 + movq mm3, mm5 ; copy mm5 on mm3 + punpcklbw mm7, mm6 ; first 4 bytes of [edi+8] on mm7 + punpcklbw mm5, mm6 ; first 4 bytes of [ebx+8] on mm5 + paddusw mm7, mm5 ; mm1 := first 4 bytes interpolated in words + psrlw mm7, 1 + + punpckhbw mm2, mm6 ; last 4 bytes of [edi+8] on mm2 + punpckhbw mm3, mm6 ; last 4 bytes of [ebx+8] on mm3 + paddusw mm2, mm3 ; mm2 := last 4 bytes interpolated in words + psrlw mm2, 1 + + packuswb mm7, mm2 ; pack 8 bytes interpolated on mm1 + movq mm5,[esi+8] ; load last 8 bytes of pix1 row + + movq mm3, mm7 ; mm7 := abs(mm1-mm5) + psubusb mm7, mm5 + psubusb mm5, mm3 + por mm7, mm5 + + ; Now mm4 and mm7 have 16 absdiffs to add + + movq mm3, mm4 ; Make copies of these bytes + movq mm2, mm7 + + punpcklbw mm4, mm6 ; Unpack to words and add + punpcklbw mm7, mm6 + paddusw mm4, mm7 + paddusw mm0, mm4 ; Add to the acumulator (mm0) + + punpckhbw mm3, mm6 ; Unpack to words and add + punpckhbw mm2, mm6 + paddusw mm3, mm2 + paddusw mm1, mm3 ; Add to the acumulator (mm1) + + ; Loop termination + + add esi, edx ; update pointers to next row + add edi, edx + add ebx, edx + sub ecx,1 + test ecx, ecx ; check rowsleft + jnz near .nextrow_y2 + + paddusw mm0, mm1 + + movq mm1, mm0 ; Copy mm0 to mm1 + psrlq mm1, 32 + paddusw mm0, mm1 ; Add + movq mm2, mm0 + psrlq mm2, 16 + paddusw mm0, mm2 + movd eax, mm0 ; Store return value + and eax, 0xffff + + pop edi + pop esi + pop edx + pop ecx + pop ebx + + pop ebp ; restore stack pointer + + emms ; clear mmx registers + ret ; return + +global pix_abs16x16_xy2_mmx + +; int pix_abs16x16_xy2_mmx(unsigned char *p1,unsigned char *p2,int lx,int h); + +; esi = p1 (init: blk1) +; edi = p2 (init: blk2) +; ebx = p1+lx +; ecx = rowsleft (init: h) +; edx = lx; + +; mm0 = distance accumulators (4 words) +; mm1 = bytes p2 +; mm2 = bytes p1 +; mm3 = bytes p1+lx +; I'd love to find someplace to stash p1+1 and p1+lx+1's bytes +; but I don't think thats going to happen in iA32-land... +; mm4 = temp 4 bytes in words interpolating p1, p1+1 +; mm5 = temp 4 bytes in words from p2 +; mm6 = temp comparison bit mask p1,p2 +; mm7 = temp comparison bit mask p2,p1 + + +align 32 +pix_abs16x16_xy2_mmx: + push ebp ; save stack pointer + mov ebp, esp ; so that we can do this + + push ebx ; Saves registers (called saves convention in + push ecx ; x86 GCC it seems) + push edx ; + push esi + push edi + + pxor mm0, mm0 ; zero acculumators + + mov esi, [ebp+12] ; get p1 + mov edi, [ebp+8] ; get p2 + mov edx, [ebp+16] ; get lx + mov ecx, [ebp+20] ; rowsleft := h + mov ebx, esi + add ebx, edx + jmp .nextrowmm11 ; snap to it +align 32 +.nextrowmm11: + + ;; + ;; First 8 bytes of row + ;; + + ;; First 4 bytes of 8 + + movq mm4, [esi] ; mm4 := first 4 bytes p1 + pxor mm7, mm7 + movq mm2, mm4 ; mm2 records all 8 bytes + punpcklbw mm4, mm7 ; First 4 bytes p1 in Words... + + movq mm6, [ebx] ; mm6 := first 4 bytes p1+lx + movq mm3, mm6 ; mm3 records all 8 bytes + punpcklbw mm6, mm7 + paddw mm4, mm6 + + + movq mm5, [esi+1] ; mm5 := first 4 bytes p1+1 + punpcklbw mm5, mm7 ; First 4 bytes p1 in Words... + paddw mm4, mm5 + movq mm6, [ebx+1] ; mm6 := first 4 bytes p1+lx+1 + punpcklbw mm6, mm7 + paddw mm4, mm6 + + psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words + + movq mm5, [edi] ; mm5:=first 4 bytes of p2 in words + movq mm1, mm5 + punpcklbw mm5, mm7 + + movq mm7,mm4 + pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5] + + movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)] + psubw mm6,mm5 + pand mm6, mm7 + + paddw mm0, mm6 ; Add to accumulator + + movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4] + pcmpgtw mm6,mm4 + psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)] + pand mm5, mm6 + + paddw mm0, mm5 ; Add to accumulator + + ;; Second 4 bytes of 8 + + movq mm4, mm2 ; mm4 := Second 4 bytes p1 in words + pxor mm7, mm7 + punpckhbw mm4, mm7 + movq mm6, mm3 ; mm6 := Second 4 bytes p1+1 in words + punpckhbw mm6, mm7 + paddw mm4, mm6 + + movq mm5, [esi+1] ; mm5 := first 4 bytes p1+1 + punpckhbw mm5, mm7 ; First 4 bytes p1 in Words... + paddw mm4, mm5 + movq mm6, [ebx+1] ; mm6 := first 4 bytes p1+lx+1 + punpckhbw mm6, mm7 + paddw mm4, mm6 + + psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words + + movq mm5, mm1 ; mm5:= second 4 bytes of p2 in words + punpckhbw mm5, mm7 + + movq mm7,mm4 + pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5] + + movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)] + psubw mm6,mm5 + pand mm6, mm7 + + paddw mm0, mm6 ; Add to accumulator + + movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4] + pcmpgtw mm6,mm4 + psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)] + pand mm5, mm6 + + paddw mm0, mm5 ; Add to accumulator + + + ;; + ;; Second 8 bytes of row + ;; + ;; First 4 bytes of 8 + + movq mm4, [esi+8] ; mm4 := first 4 bytes p1+8 + pxor mm7, mm7 + movq mm2, mm4 ; mm2 records all 8 bytes + punpcklbw mm4, mm7 ; First 4 bytes p1 in Words... + + movq mm6, [ebx+8] ; mm6 := first 4 bytes p1+lx+8 + movq mm3, mm6 ; mm3 records all 8 bytes + punpcklbw mm6, mm7 + paddw mm4, mm6 + + + movq mm5, [esi+9] ; mm5 := first 4 bytes p1+9 + punpcklbw mm5, mm7 ; First 4 bytes p1 in Words... + paddw mm4, mm5 + movq mm6, [ebx+9] ; mm6 := first 4 bytes p1+lx+9 + punpcklbw mm6, mm7 + paddw mm4, mm6 + + psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words + + movq mm5, [edi+8] ; mm5:=first 4 bytes of p2+8 in words + movq mm1, mm5 + punpcklbw mm5, mm7 + + movq mm7,mm4 + pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5] + + movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)] + psubw mm6,mm5 + pand mm6, mm7 + + paddw mm0, mm6 ; Add to accumulator + + movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4] + pcmpgtw mm6,mm4 + psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)] + pand mm5, mm6 + + paddw mm0, mm5 ; Add to accumulator + + ;; Second 4 bytes of 8 + + movq mm4, mm2 ; mm4 := Second 4 bytes p1 in words + pxor mm7, mm7 + punpckhbw mm4, mm7 + movq mm6, mm3 ; mm6 := Second 4 bytes p1+1 in words + punpckhbw mm6, mm7 + paddw mm4, mm6 + + movq mm5, [esi+9] ; mm5 := first 4 bytes p1+1 + punpckhbw mm5, mm7 ; First 4 bytes p1 in Words... + paddw mm4, mm5 + movq mm6, [ebx+9] ; mm6 := first 4 bytes p1+lx+1 + punpckhbw mm6, mm7 + paddw mm4, mm6 + + psrlw mm4, 2 ; mm4 := First 4 bytes interpolated in words + + movq mm5, mm1 ; mm5:= second 4 bytes of p2 in words + punpckhbw mm5, mm7 + + movq mm7,mm4 + pcmpgtw mm7,mm5 ; mm7 := [i : W0..3,mm4>mm5] + + movq mm6,mm4 ; mm6 := [i : W0..3, (mm4-mm5)*(mm4-mm5 > 0)] + psubw mm6,mm5 + pand mm6, mm7 + + paddw mm0, mm6 ; Add to accumulator + + movq mm6,mm5 ; mm6 := [i : W0..3,mm5>mm4] + pcmpgtw mm6,mm4 + psubw mm5,mm4 ; mm5 := [i : B0..7, (mm5-mm4)*(mm5-mm4 > 0)] + pand mm5, mm6 + + paddw mm0, mm5 ; Add to accumulator + + + ;; + ;; Loop termination condition... and stepping + ;; + + add esi, edx ; update pointer to next row + add edi, edx ; ditto + add ebx, edx + + sub ecx,1 + test ecx, ecx ; check rowsleft + jnz near .nextrowmm11 + + ;; Sum the Accumulators + movq mm4, mm0 + psrlq mm4, 32 + paddw mm0, mm4 + movq mm6, mm0 + psrlq mm6, 16 + paddw mm0, mm6 + movd eax, mm0 ; store return value + and eax, 0xffff + + pop edi + pop esi + pop edx + pop ecx + pop ebx + + pop ebp ; restore stack pointer + + emms ; clear mmx registers + ret ; we now return you to your regular programming + + diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c new file mode 100644 index 0000000000..01289d4d83 --- /dev/null +++ b/libavcodec/imgconvert.c @@ -0,0 +1,214 @@ +/* + * Misc image convertion routines + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "avcodec.h" + +/* XXX: totally non optimized */ + +static void yuv422_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, + UINT8 *src, int width, int height) +{ + int x, y; + UINT8 *p = src; + + for(y=0;y<height;y+=2) { + for(x=0;x<width;x+=2) { + lum[0] = p[0]; + cb[0] = p[1]; + lum[1] = p[2]; + cr[0] = p[3]; + p += 4; + lum += 2; + cb++; + cr++; + } + for(x=0;x<width;x+=2) { + lum[0] = p[0]; + lum[1] = p[2]; + p += 4; + lum += 2; + } + } +} + +#define SCALEBITS 8 +#define ONE_HALF (1 << (SCALEBITS - 1)) +#define FIX(x) ((int) ((x) * (1L<<SCALEBITS) + 0.5)) + +static void rgb24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, + UINT8 *src, int width, int height) +{ + int wrap, wrap3, x, y; + int r, g, b, r1, g1, b1; + UINT8 *p; + + wrap = width; + wrap3 = width * 3; + p = src; + for(y=0;y<height;y+=2) { + for(x=0;x<width;x+=2) { + r = p[0]; + g = p[1]; + b = p[2]; + r1 = r; + g1 = g; + b1 = b; + lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + r = p[3]; + g = p[4]; + b = p[5]; + r1 += r; + g1 += g; + b1 += b; + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + p += wrap3; + lum += wrap; + + r = p[0]; + g = p[1]; + b = p[2]; + r1 += r; + g1 += g; + b1 += b; + lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + r = p[3]; + g = p[4]; + b = p[5]; + r1 += r; + g1 += g; + b1 += b; + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + + cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + + FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; + cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - + FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; + + cb++; + cr++; + p += -wrap3 + 2 * 3; + lum += -wrap + 2; + } + p += wrap3; + lum += wrap; + } +} + +static void bgr24_to_yuv420p(UINT8 *lum, UINT8 *cb, UINT8 *cr, + UINT8 *src, int width, int height) +{ + int wrap, wrap3, x, y; + int r, g, b, r1, g1, b1; + UINT8 *p; + + wrap = width; + wrap3 = width * 3; + p = src; + for(y=0;y<height;y+=2) { + for(x=0;x<width;x+=2) { + b = p[0]; + g = p[1]; + r = p[2]; + r1 = r; + g1 = g; + b1 = b; + lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + b = p[3]; + g = p[4]; + r = p[5]; + r1 += r; + g1 += g; + b1 += b; + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + p += wrap3; + lum += wrap; + + b = p[0]; + g = p[1]; + r = p[2]; + r1 += r; + g1 += g; + b1 += b; + lum[0] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + b = p[3]; + g = p[4]; + r = p[5]; + r1 += r; + g1 += g; + b1 += b; + lum[1] = (FIX(0.29900) * r + FIX(0.58700) * g + + FIX(0.11400) * b + ONE_HALF) >> SCALEBITS; + + cb[0] = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 + + FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; + cr[0] = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 - + FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128; + + cb++; + cr++; + p += -wrap3 + 2 * 3; + lum += -wrap + 2; + } + p += wrap3; + lum += wrap; + } +} + +int img_convert_to_yuv420(UINT8 *img_out, UINT8 *img, + int pix_fmt, int width, int height) +{ + UINT8 *pict; + int size, size_out; + UINT8 *picture[3]; + + pict = img_out; + size = width * height; + size_out = (size * 3) / 2; + picture[0] = pict; + picture[1] = pict + size; + picture[2] = picture[1] + (size / 4); + + switch(pix_fmt) { + case PIX_FMT_YUV420P: + memcpy(pict, img, size_out); + break; + case PIX_FMT_YUV422: + yuv422_to_yuv420p(picture[0], picture[1], picture[2], + img, width, height); + break; + case PIX_FMT_RGB24: + rgb24_to_yuv420p(picture[0], picture[1], picture[2], + img, width, height); + break; + case PIX_FMT_BGR24: + bgr24_to_yuv420p(picture[0], picture[1], picture[2], + img, width, height); + break; + } + return size_out; +} diff --git a/libavcodec/imgresample.c b/libavcodec/imgresample.c new file mode 100644 index 0000000000..99153013cf --- /dev/null +++ b/libavcodec/imgresample.c @@ -0,0 +1,614 @@ +/* + * High quality image resampling with polyphase filters + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include "dsputil.h" +#include "avcodec.h" + +#define NB_COMPONENTS 3 + +#define PHASE_BITS 4 +#define NB_PHASES (1 << PHASE_BITS) +#define NB_TAPS 4 +#define FCENTER 1 /* index of the center of the filter */ + +#define POS_FRAC_BITS 16 +#define POS_FRAC (1 << POS_FRAC_BITS) +/* 6 bits precision is needed for MMX */ +#define FILTER_BITS 8 + +#define LINE_BUF_HEIGHT (NB_TAPS * 4) + +struct ImgReSampleContext { + int iwidth, iheight, owidth, oheight; + int h_incr, v_incr; + INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */ + INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */ + UINT8 *line_buf; +}; + +static inline int get_phase(int pos) +{ + return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); +} + +/* This function must be optimized */ +static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width, + int src_start, int src_incr, INT16 *filters) +{ + int src_pos, phase, sum, i; + UINT8 *s; + INT16 *filter; + + src_pos = src_start; + for(i=0;i<dst_width;i++) { +#ifdef TEST + /* test */ + if ((src_pos >> POS_FRAC_BITS) < 0 || + (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) + abort(); +#endif + s = src + (src_pos >> POS_FRAC_BITS); + phase = get_phase(src_pos); + filter = filters + phase * NB_TAPS; +#if NB_TAPS == 4 + sum = s[0] * filter[0] + + s[1] * filter[1] + + s[2] * filter[2] + + s[3] * filter[3]; +#else + { + int j; + sum = 0; + for(j=0;j<NB_TAPS;j++) + sum += s[j] * filter[j]; + } +#endif + sum = sum >> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + src_pos += src_incr; + dst++; + } +} + +/* This function must be optimized */ +static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap, + INT16 *filter) +{ + int sum, i; + UINT8 *s; + + s = src; + for(i=0;i<dst_width;i++) { +#if NB_TAPS == 4 + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; +#else + { + int j; + UINT8 *s1 = s; + + sum = 0; + for(j=0;j<NB_TAPS;j++) { + sum += s1[0] * filter[j]; + s1 += wrap; + } + } +#endif + sum = sum >> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + dst++; + s++; + } +} + +#ifdef CONFIG_MMX + +#include "i386/mmx.h" + +#define FILTER4(reg) \ +{\ + s = src + (src_pos >> POS_FRAC_BITS);\ + phase = get_phase(src_pos);\ + filter = filters + phase * NB_TAPS;\ + movq_m2r(*s, reg);\ + punpcklbw_r2r(mm7, reg);\ + movq_m2r(*filter, mm6);\ + pmaddwd_r2r(reg, mm6);\ + movq_r2r(mm6, reg);\ + psrlq_i2r(32, reg);\ + paddd_r2r(mm6, reg);\ + psrad_i2r(FILTER_BITS, reg);\ + src_pos += src_incr;\ +} + +#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); + +/* XXX: do four pixels at a time */ +static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width, + int src_start, int src_incr, INT16 *filters) +{ + int src_pos, phase; + UINT8 *s; + INT16 *filter; + mmx_t tmp; + + src_pos = src_start; + pxor_r2r(mm7, mm7); + + while (dst_width >= 4) { + + FILTER4(mm0); + FILTER4(mm1); + FILTER4(mm2); + FILTER4(mm3); + + packuswb_r2r(mm7, mm0); + packuswb_r2r(mm7, mm1); + packuswb_r2r(mm7, mm3); + packuswb_r2r(mm7, mm2); + movq_r2m(mm0, tmp); + dst[0] = tmp.ub[0]; + movq_r2m(mm1, tmp); + dst[1] = tmp.ub[0]; + movq_r2m(mm2, tmp); + dst[2] = tmp.ub[0]; + movq_r2m(mm3, tmp); + dst[3] = tmp.ub[0]; + dst += 4; + dst_width -= 4; + } + while (dst_width > 0) { + FILTER4(mm0); + packuswb_r2r(mm7, mm0); + movq_r2m(mm0, tmp); + dst[0] = tmp.ub[0]; + dst++; + dst_width--; + } + emms(); +} + +static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap, + INT16 *filter) +{ + int sum, i, v; + UINT8 *s; + mmx_t tmp; + mmx_t coefs[4]; + + for(i=0;i<4;i++) { + v = filter[i]; + coefs[i].uw[0] = v; + coefs[i].uw[1] = v; + coefs[i].uw[2] = v; + coefs[i].uw[3] = v; + } + + pxor_r2r(mm7, mm7); + s = src; + while (dst_width >= 4) { + movq_m2r(s[0 * wrap], mm0); + punpcklbw_r2r(mm7, mm0); + movq_m2r(s[1 * wrap], mm1); + punpcklbw_r2r(mm7, mm1); + movq_m2r(s[2 * wrap], mm2); + punpcklbw_r2r(mm7, mm2); + movq_m2r(s[3 * wrap], mm3); + punpcklbw_r2r(mm7, mm3); + + pmullw_m2r(coefs[0], mm0); + pmullw_m2r(coefs[1], mm1); + pmullw_m2r(coefs[2], mm2); + pmullw_m2r(coefs[3], mm3); + + paddw_r2r(mm1, mm0); + paddw_r2r(mm3, mm2); + paddw_r2r(mm2, mm0); + psraw_i2r(FILTER_BITS, mm0); + + packuswb_r2r(mm7, mm0); + movq_r2m(mm0, tmp); + + *(UINT32 *)dst = tmp.ud[0]; + dst += 4; + s += 4; + dst_width -= 4; + } + while (dst_width > 0) { + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; + sum = sum >> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + dst++; + s++; + dst_width--; + } + emms(); +} +#endif + +/* slow version to handle limit cases. Does not need optimisation */ +static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width, + int src_start, int src_incr, INT16 *filters) +{ + int src_pos, phase, sum, j, v, i; + UINT8 *s, *src_end; + INT16 *filter; + + src_end = src + src_width; + src_pos = src_start; + for(i=0;i<dst_width;i++) { + s = src + (src_pos >> POS_FRAC_BITS); + phase = get_phase(src_pos); + filter = filters + phase * NB_TAPS; + sum = 0; + for(j=0;j<NB_TAPS;j++) { + if (s < src) + v = src[0]; + else if (s >= src_end) + v = src_end[-1]; + else + v = s[0]; + sum += v * filter[j]; + s++; + } + sum = sum >> FILTER_BITS; + if (sum < 0) + sum = 0; + else if (sum > 255) + sum = 255; + dst[0] = sum; + src_pos += src_incr; + dst++; + } +} + +static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width, + int src_start, int src_incr, INT16 *filters) +{ + int n, src_end; + + if (src_start < 0) { + n = (0 - src_start + src_incr - 1) / src_incr; + h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); + dst += n; + dst_width -= n; + src_start += n * src_incr; + } + src_end = src_start + dst_width * src_incr; + if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) { + n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / + src_incr; + } else { + n = dst_width; + } +#ifdef CONFIG_MMX + if ((mm_flags & MM_MMX) && NB_TAPS == 4) + h_resample_fast4_mmx(dst, n, + src, src_width, src_start, src_incr, filters); + else +#endif + h_resample_fast(dst, n, + src, src_width, src_start, src_incr, filters); + if (n < dst_width) { + dst += n; + dst_width -= n; + src_start += n * src_incr; + h_resample_slow(dst, dst_width, + src, src_width, src_start, src_incr, filters); + } +} + +static void component_resample(ImgReSampleContext *s, + UINT8 *output, int owrap, int owidth, int oheight, + UINT8 *input, int iwrap, int iwidth, int iheight) +{ + int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y; + UINT8 *new_line, *src_line; + + last_src_y = - FCENTER - 1; + /* position of the bottom of the filter in the source image */ + src_y = (last_src_y + NB_TAPS) * POS_FRAC; + ring_y = NB_TAPS; /* position in ring buffer */ + for(y=0;y<oheight;y++) { + /* apply horizontal filter on new lines from input if needed */ + src_y1 = src_y >> POS_FRAC_BITS; + while (last_src_y < src_y1) { + if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS) + ring_y = NB_TAPS; + last_src_y++; + /* handle limit conditions : replicate line (slighly + inefficient because we filter multiple times */ + y1 = last_src_y; + if (y1 < 0) { + y1 = 0; + } else if (y1 >= iheight) { + y1 = iheight - 1; + } + src_line = input + y1 * iwrap; + new_line = s->line_buf + ring_y * owidth; + /* apply filter and handle limit cases correctly */ + h_resample(new_line, owidth, + src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, + &s->h_filters[0][0]); + /* handle ring buffer wraping */ + if (ring_y >= LINE_BUF_HEIGHT) { + memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, + new_line, owidth); + } + } + /* apply vertical filter */ + phase_y = get_phase(src_y); +#ifdef CONFIG_MMX + /* desactivated MMX because loss of precision */ + if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) + v_resample4_mmx(output, owidth, + s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, + &s->v_filters[phase_y][0]); + else +#endif + v_resample(output, owidth, + s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, + &s->v_filters[phase_y][0]); + + src_y += s->v_incr; + output += owrap; + } +} + +/* XXX: the following filter is quite naive, but it seems to suffice + for 4 taps */ +static void build_filter(INT16 *filter, float factor) +{ + int ph, i, v; + float x, y, tab[NB_TAPS], norm, mult; + + /* if upsampling, only need to interpolate, no filter */ + if (factor > 1.0) + factor = 1.0; + + for(ph=0;ph<NB_PHASES;ph++) { + norm = 0; + for(i=0;i<NB_TAPS;i++) { + + x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor; + if (x == 0) + y = 1.0; + else + y = sin(x) / x; + tab[i] = y; + norm += y; + } + + /* normalize so that an uniform color remains the same */ + mult = (float)(1 << FILTER_BITS) / norm; + for(i=0;i<NB_TAPS;i++) { + v = (int)(tab[i] * mult); + filter[ph * NB_TAPS + i] = v; + } + } +} + +ImgReSampleContext *img_resample_init(int owidth, int oheight, + int iwidth, int iheight) +{ + ImgReSampleContext *s; + + s = av_mallocz(sizeof(ImgReSampleContext)); + if (!s) + return NULL; + s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); + if (!s->line_buf) + goto fail; + + s->owidth = owidth; + s->oheight = oheight; + s->iwidth = iwidth; + s->iheight = iheight; + + s->h_incr = (iwidth * POS_FRAC) / owidth; + s->v_incr = (iheight * POS_FRAC) / oheight; + + build_filter(&s->h_filters[0][0], (float)owidth / (float)iwidth); + build_filter(&s->v_filters[0][0], (float)oheight / (float)iheight); + + return s; + fail: + free(s); + return NULL; +} + +void img_resample(ImgReSampleContext *s, + AVPicture *output, AVPicture *input) +{ + int i, shift; + + for(i=0;i<3;i++) { + shift = (i == 0) ? 0 : 1; + component_resample(s, output->data[i], output->linesize[i], + s->owidth >> shift, s->oheight >> shift, + input->data[i], input->linesize[i], + s->iwidth >> shift, s->iheight >> shift); + } +} + +void img_resample_close(ImgReSampleContext *s) +{ + free(s->line_buf); + free(s); +} + +#ifdef TEST + +void *av_mallocz(int size) +{ + void *ptr; + ptr = malloc(size); + memset(ptr, 0, size); + return ptr; +} + +/* input */ +#define XSIZE 256 +#define YSIZE 256 +UINT8 img[XSIZE * YSIZE]; + +/* output */ +#define XSIZE1 512 +#define YSIZE1 512 +UINT8 img1[XSIZE1 * YSIZE1]; +UINT8 img2[XSIZE1 * YSIZE1]; + +void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize) +{ + FILE *f; + f=fopen(filename,"w"); + fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); + fwrite(img,1, xsize * ysize,f); + fclose(f); +} + +static void dump_filter(INT16 *filter) +{ + int i, ph; + + for(ph=0;ph<NB_PHASES;ph++) { + printf("%2d: ", ph); + for(i=0;i<NB_TAPS;i++) { + printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); + } + printf("\n"); + } +} + +#ifdef CONFIG_MMX +int mm_flags; +#endif + +int main(int argc, char **argv) +{ + int x, y, v, i, xsize, ysize; + ImgReSampleContext *s; + float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; + char buf[256]; + + /* build test image */ + for(y=0;y<YSIZE;y++) { + for(x=0;x<XSIZE;x++) { + if (x < XSIZE/2 && y < YSIZE/2) { + if (x < XSIZE/4 && y < YSIZE/4) { + if ((x % 10) <= 6 && + (y % 10) <= 6) + v = 0xff; + else + v = 0x00; + } else if (x < XSIZE/4) { + if (x & 1) + v = 0xff; + else + v = 0; + } else if (y < XSIZE/4) { + if (y & 1) + v = 0xff; + else + v = 0; + } else { + if (y < YSIZE*3/8) { + if ((y+x) & 1) + v = 0xff; + else + v = 0; + } else { + if (((x+3) % 4) <= 1 && + ((y+3) % 4) <= 1) + v = 0xff; + else + v = 0x00; + } + } + } else if (x < XSIZE/2) { + v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); + } else if (y < XSIZE/2) { + v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); + } else { + v = ((x + y - XSIZE) * 255) / XSIZE; + } + img[y * XSIZE + x] = v; + } + } + save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE); + for(i=0;i<sizeof(factors)/sizeof(float);i++) { + fact = factors[i]; + xsize = (int)(XSIZE * fact); + ysize = (int)(YSIZE * fact); + s = img_resample_init(xsize, ysize, XSIZE, YSIZE); + printf("Factor=%0.2f\n", fact); + dump_filter(&s->h_filters[0][0]); + component_resample(s, img1, xsize, xsize, ysize, + img, XSIZE, XSIZE, YSIZE); + img_resample_close(s); + + sprintf(buf, "/tmp/out%d.pgm", i); + save_pgm(buf, img1, xsize, ysize); + } + + /* mmx test */ +#ifdef CONFIG_MMX + printf("MMX test\n"); + fact = 0.72; + xsize = (int)(XSIZE * fact); + ysize = (int)(YSIZE * fact); + mm_flags = MM_MMX; + s = img_resample_init(xsize, ysize, XSIZE, YSIZE); + component_resample(s, img1, xsize, xsize, ysize, + img, XSIZE, XSIZE, YSIZE); + + mm_flags = 0; + s = img_resample_init(xsize, ysize, XSIZE, YSIZE); + component_resample(s, img2, xsize, xsize, ysize, + img, XSIZE, XSIZE, YSIZE); + if (memcmp(img1, img2, xsize * ysize) != 0) { + fprintf(stderr, "mmx error\n"); + exit(1); + } + printf("MMX OK\n"); +#endif + return 0; +} + +#endif diff --git a/libavcodec/jfdctfst.c b/libavcodec/jfdctfst.c new file mode 100644 index 0000000000..cdc3b47f99 --- /dev/null +++ b/libavcodec/jfdctfst.c @@ -0,0 +1,224 @@ +/* + * jfdctfst.c + * + * Copyright (C) 1994-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains a fast, not so accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). + * + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * This implementation is based on Arai, Agui, and Nakajima's algorithm for + * scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in + * Japanese, but the algorithm is described in the Pennebaker & Mitchell + * JPEG textbook (see REFERENCES section in file README). The following code + * is based directly on figure 4-8 in P&M. + * While an 8-point DCT cannot be done in less than 11 multiplies, it is + * possible to arrange the computation so that many of the multiplies are + * simple scalings of the final outputs. These multiplies can then be + * folded into the multiplications or divisions by the JPEG quantization + * table entries. The AA&N method leaves only 5 multiplies and 29 adds + * to be done in the DCT itself. + * The primary disadvantage of this method is that with fixed-point math, + * accuracy is lost due to imprecise representation of the scaled + * quantization values. The smaller the quantization table entry, the less + * precise the scaled value, so this implementation does worse with high- + * quality-setting files than with low-quality ones. + */ + +#include <stdlib.h> +#include <stdio.h> +#include "common.h" +#include "dsputil.h" + +#define DCTSIZE 8 +#define GLOBAL(x) x +#define RIGHT_SHIFT(x, n) ((x) >> (n)) +#define SHIFT_TEMPS + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* Scaling decisions are generally the same as in the LL&M algorithm; + * see jfdctint.c for more details. However, we choose to descale + * (right shift) multiplication products as soon as they are formed, + * rather than carrying additional fractional bits into subsequent additions. + * This compromises accuracy slightly, but it lets us save a few shifts. + * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples) + * everywhere except in the multiplications proper; this saves a good deal + * of work on 16-bit-int machines. + * + * Again to save a few shifts, the intermediate results between pass 1 and + * pass 2 are not upscaled, but are represented only to integral precision. + * + * A final compromise is to represent the multiplicative constants to only + * 8 fractional bits, rather than 13. This saves some shifting work on some + * machines, and may also reduce the cost of multiplication (since there + * are fewer one-bits in the constants). + */ + +#define CONST_BITS 8 + + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 8 +#define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */ +#else +#define FIX_0_382683433 FIX(0.382683433) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_707106781 FIX(0.707106781) +#define FIX_1_306562965 FIX(1.306562965) +#endif + + +/* We can gain a little more speed, with a further compromise in accuracy, + * by omitting the addition in a descaling shift. This yields an incorrectly + * rounded result half the time... + */ + +#ifndef USE_ACCURATE_ROUNDING +#undef DESCALE +#define DESCALE(x,n) RIGHT_SHIFT(x, n) +#endif + + +/* Multiply a DCTELEM variable by an INT32 constant, and immediately + * descale to yield a DCTELEM result. + */ + +#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) + + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +jpeg_fdct_ifast (DCTELEM * data) +{ + DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + DCTELEM tmp10, tmp11, tmp12, tmp13; + DCTELEM z1, z2, z3, z4, z5, z11, z13; + DCTELEM *dataptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[0] + dataptr[7]; + tmp7 = dataptr[0] - dataptr[7]; + tmp1 = dataptr[1] + dataptr[6]; + tmp6 = dataptr[1] - dataptr[6]; + tmp2 = dataptr[2] + dataptr[5]; + tmp5 = dataptr[2] - dataptr[5]; + tmp3 = dataptr[3] + dataptr[4]; + tmp4 = dataptr[3] - dataptr[4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[0] = tmp10 + tmp11; /* phase 3 */ + dataptr[4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[3] = z13 - z2; + dataptr[1] = z11 + z4; + dataptr[7] = z11 - z4; + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ + dataptr[DCTSIZE*4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ + dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ + dataptr[DCTSIZE*6] = tmp13 - z1; + + /* Odd part */ + + tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp11 = tmp5 + tmp6; + tmp12 = tmp6 + tmp7; + + /* The rotator is modified from fig 4-8 to avoid extra negations. */ + z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + + z11 = tmp7 + z3; /* phase 5 */ + z13 = tmp7 - z3; + + dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ + dataptr[DCTSIZE*3] = z13 - z2; + dataptr[DCTSIZE*1] = z11 + z4; + dataptr[DCTSIZE*7] = z11 - z4; + + dataptr++; /* advance pointer to next column */ + } +} diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c new file mode 100644 index 0000000000..2ef40f38e5 --- /dev/null +++ b/libavcodec/jrevdct.c @@ -0,0 +1,1167 @@ +/* + * jrevdct.c + * + * Copyright (C) 1991, 1992, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains the basic inverse-DCT transformation subroutine. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * I've made lots of modifications to attempt to take advantage of the + * sparse nature of the DCT matrices we're getting. Although the logic + * is cumbersome, it's straightforward and the resulting code is much + * faster. + * + * A better way to do this would be to pass in the DCT block as a sparse + * matrix, perhaps with the difference cases encoded. + */ +#include "common.h" +#include "dsputil.h" + +#define EIGHT_BIT_SAMPLES + +#define DCTSIZE 8 +#define DCTSIZE2 64 + +#define GLOBAL + +#define RIGHT_SHIFT(x, n) ((x) >> (n)) + +typedef DCTELEM DCTBLOCK[DCTSIZE2]; + +#define CONST_BITS 13 + +/* + * This routine is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ +#endif + + +/* + * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * The poop on this scaling stuff is as follows: + * + * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) + * larger than the true IDCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D IDCT, + * because the y0 and y4 inputs need not be divided by sqrt(N). + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (To scale up 12-bit sample data further, an + * intermediate int32 array would be needed.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#ifdef EIGHT_BIT_SAMPLES +#define PASS1_BITS 2 +#else +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +#define ONE ((INT32) 1) + +#define CONST_SCALE (ONE << CONST_BITS) + +/* Convert a positive real constant to an integer scaled by CONST_SCALE. + * IMPORTANT: if your compiler doesn't do this arithmetic at compile time, + * you will pay a significant penalty in run time. In that case, figure + * the correct integer constant values and insert them by hand. + */ + +/* Actually FIX is no longer used, we precomputed them all */ +#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) + +/* Descale and correctly round an INT32 value that's scaled by N bits. + * We assume RIGHT_SHIFT rounds towards minus infinity, so adding + * the fudge factor is correct for either sign of X. + */ + +#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) + +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply; + * this provides a useful speedup on many machines. + * There is no way to specify a 16x16->32 multiply in portable C, but + * some C compilers will do the right thing if you provide the correct + * combination of casts. + * NB: for 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#ifdef EIGHT_BIT_SAMPLES +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) +#endif +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) +#endif +#endif + +#ifndef MULTIPLY /* default definition */ +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +/* + Unlike our decoder where we approximate the FIXes, we need to use exact +ones here or successive P-frames will drift too much with Reference frame coding +*/ +#define FIX_0_211164243 1730 +#define FIX_0_275899380 2260 +#define FIX_0_298631336 2446 +#define FIX_0_390180644 3196 +#define FIX_0_509795579 4176 +#define FIX_0_541196100 4433 +#define FIX_0_601344887 4926 +#define FIX_0_765366865 6270 +#define FIX_0_785694958 6436 +#define FIX_0_899976223 7373 +#define FIX_1_061594337 8697 +#define FIX_1_111140466 9102 +#define FIX_1_175875602 9633 +#define FIX_1_306562965 10703 +#define FIX_1_387039845 11363 +#define FIX_1_451774981 11893 +#define FIX_1_501321110 12299 +#define FIX_1_662939225 13623 +#define FIX_1_847759065 15137 +#define FIX_1_961570560 16069 +#define FIX_2_053119869 16819 +#define FIX_2_172734803 17799 +#define FIX_2_562915447 20995 +#define FIX_3_072711026 25172 + +/* + * Perform the inverse DCT on one block of coefficients. + */ + +void j_rev_dct(DCTBLOCK data) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3, z4, z5; + INT32 d0, d1, d2, d3, d4, d5, d6, d7; + register DCTELEM *dataptr; + int rowctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register int *idataptr = (int*)dataptr; + + d0 = dataptr[0]; + d1 = dataptr[1]; + d2 = dataptr[2]; + d3 = dataptr[3]; + d4 = dataptr[4]; + d5 = dataptr[5]; + d6 = dataptr[6]; + d7 = dataptr[7]; + + if ((d1 == 0) && (idataptr[1] | idataptr[2] | idataptr[3]) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); + + idataptr[0] = v; + idataptr[1] = v; + idataptr[2] = v; + idataptr[3] = v; + } + + dataptr += DCTSIZE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ +{ + if (d6) { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } + } + } else { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = d4 << CONST_BITS; + tmp11 = tmp12 = -tmp10; + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = 0; + } + } + } + } + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + if (d7) { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ + z2 = d5 + d3; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d5, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d5, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 = z1 + z4; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z4 = d5 + d1; + z5 = MULTIPLY(d7 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z3 = MULTIPLY(-d7, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 = z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z5 = MULTIPLY(d5 + d7, FIX_1_175875602); + + z3 += z5; + z4 += z5; + + tmp0 += z3; + tmp1 += z4; + tmp2 = z2 + z3; + tmp3 = z1 + z4; + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d1, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d1, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 = z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ + z3 = d7 + d3; + + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + tmp2 = MULTIPLY(d3, FIX_0_509795579); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z5 = MULTIPLY(z3, FIX_1_175875602); + z3 = MULTIPLY(-z3, FIX_0_785694958); + + tmp0 += z3; + tmp1 = z2 + z5; + tmp2 += z3; + tmp3 = z1 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z5 = MULTIPLY(z1, FIX_1_175875602); + + z1 = MULTIPLY(z1, FIX_0_275899380); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp0 = MULTIPLY(-d7, FIX_1_662939225); + z4 = MULTIPLY(-d1, FIX_0_390180644); + tmp3 = MULTIPLY(d1, FIX_1_111140466); + + tmp0 += z1; + tmp1 = z4 + z5; + tmp2 = z3 + z5; + tmp3 += z1; + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_1_387039845); + tmp1 = MULTIPLY(d7, FIX_1_175875602); + tmp2 = MULTIPLY(-d7, FIX_0_785694958); + tmp3 = MULTIPLY(d7, FIX_0_275899380); + } + } + } + } else { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(d3 + z4, FIX_1_175875602); + + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-d1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-d3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 = z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + + z5 = MULTIPLY(z2, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_1_662939225); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z2 = MULTIPLY(-z2, FIX_1_387039845); + tmp2 = MULTIPLY(d3, FIX_1_111140466); + z3 = MULTIPLY(-d3, FIX_1_961570560); + + tmp0 = z3 + z5; + tmp1 += z2; + tmp2 += z2; + tmp3 = z4 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ + z4 = d5 + d1; + + z5 = MULTIPLY(z4, FIX_1_175875602); + z1 = MULTIPLY(-d1, FIX_0_899976223); + tmp3 = MULTIPLY(d1, FIX_0_601344887); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(z4, FIX_0_785694958); + + tmp0 = z1 + z5; + tmp1 += z4; + tmp2 = z2 + z5; + tmp3 += z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ + tmp0 = MULTIPLY(d5, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_0_275899380); + tmp2 = MULTIPLY(-d5, FIX_1_387039845); + tmp3 = MULTIPLY(d5, FIX_0_785694958); + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ + z5 = d1 + d3; + tmp3 = MULTIPLY(d1, FIX_0_211164243); + tmp2 = MULTIPLY(-d3, FIX_1_451774981); + z1 = MULTIPLY(d1, FIX_1_061594337); + z2 = MULTIPLY(-d3, FIX_2_172734803); + z4 = MULTIPLY(z5, FIX_0_785694958); + z5 = MULTIPLY(z5, FIX_1_175875602); + + tmp0 = z1 - z4; + tmp1 = z2 + z4; + tmp2 += z5; + tmp3 += z5; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(-d3, FIX_0_785694958); + tmp1 = MULTIPLY(-d3, FIX_1_387039845); + tmp2 = MULTIPLY(-d3, FIX_0_275899380); + tmp3 = MULTIPLY(d3, FIX_1_175875602); + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d1, FIX_0_275899380); + tmp1 = MULTIPLY(d1, FIX_0_785694958); + tmp2 = MULTIPLY(d1, FIX_1_175875602); + tmp3 = MULTIPLY(d1, FIX_1_387039845); + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = tmp1 = tmp2 = tmp3 = 0; + } + } + } + } +} + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSIZE*0]; + d1 = dataptr[DCTSIZE*1]; + d2 = dataptr[DCTSIZE*2]; + d3 = dataptr[DCTSIZE*3]; + d4 = dataptr[DCTSIZE*4]; + d5 = dataptr[DCTSIZE*5]; + d6 = dataptr[DCTSIZE*6]; + d7 = dataptr[DCTSIZE*7]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } + } + } else { + if (d4) { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d4 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp2 - tmp0; + tmp12 = -(tmp0 + tmp2); + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = d4 << CONST_BITS; + tmp11 = tmp12 = -tmp10; + } + } + } else { + if (d2) { + if (d0) { + /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = d0 << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + } else { + /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp10 = tmp3; + tmp13 = -tmp3; + tmp11 = tmp2; + tmp12 = -tmp2; + } + } else { + if (d0) { + /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; + } else { + /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ + tmp10 = tmp13 = tmp11 = tmp12 = 0; + } + } + } + } + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + if (d7) { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5 + d3; + z3 = d7 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */ + z1 = d7; + z2 = d5 + d3; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d5, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d5, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 = z1 + z4; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */ + z1 = d7 + d1; + z2 = d5; + z3 = d7; + z4 = d5 + d1; + z5 = MULTIPLY(z3 + z4, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z3 = MULTIPLY(-d7, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 += z2 + z4; + tmp2 = z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z5 = MULTIPLY(d5 + d7, FIX_1_175875602); + + z3 += z5; + z4 += z5; + + tmp0 += z3; + tmp1 += z4; + tmp2 = z2 + z3; + tmp3 = z1 + z4; + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z3 = d7 + d3; + z5 = MULTIPLY(z3 + d1, FIX_1_175875602); + + tmp0 = MULTIPLY(d7, FIX_0_298631336); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-z1, FIX_0_899976223); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z3 = MULTIPLY(-z3, FIX_1_961570560); + z4 = MULTIPLY(-d1, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 += z1 + z3; + tmp1 = z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */ + z3 = d7 + d3; + + tmp0 = MULTIPLY(-d7, FIX_0_601344887); + z1 = MULTIPLY(-d7, FIX_0_899976223); + tmp2 = MULTIPLY(d3, FIX_0_509795579); + z2 = MULTIPLY(-d3, FIX_2_562915447); + z5 = MULTIPLY(z3, FIX_1_175875602); + z3 = MULTIPLY(-z3, FIX_0_785694958); + + tmp0 += z3; + tmp1 = z2 + z5; + tmp2 += z3; + tmp3 = z1 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */ + z1 = d7 + d1; + z5 = MULTIPLY(z1, FIX_1_175875602); + + z1 = MULTIPLY(z1, FIX_0_275899380); + z3 = MULTIPLY(-d7, FIX_1_961570560); + tmp0 = MULTIPLY(-d7, FIX_1_662939225); + z4 = MULTIPLY(-d1, FIX_0_390180644); + tmp3 = MULTIPLY(d1, FIX_1_111140466); + + tmp0 += z1; + tmp1 = z4 + z5; + tmp2 = z3 + z5; + tmp3 += z1; + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */ + tmp0 = MULTIPLY(-d7, FIX_1_387039845); + tmp1 = MULTIPLY(d7, FIX_1_175875602); + tmp2 = MULTIPLY(-d7, FIX_0_785694958); + tmp3 = MULTIPLY(d7, FIX_0_275899380); + } + } + } + } else { + if (d5) { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + z4 = d5 + d1; + z5 = MULTIPLY(d3 + z4, FIX_1_175875602); + + tmp1 = MULTIPLY(d5, FIX_2_053119869); + tmp2 = MULTIPLY(d3, FIX_3_072711026); + tmp3 = MULTIPLY(d1, FIX_1_501321110); + z1 = MULTIPLY(-d1, FIX_0_899976223); + z2 = MULTIPLY(-z2, FIX_2_562915447); + z3 = MULTIPLY(-d3, FIX_1_961570560); + z4 = MULTIPLY(-z4, FIX_0_390180644); + + z3 += z5; + z4 += z5; + + tmp0 = z1 + z3; + tmp1 += z2 + z4; + tmp2 += z2 + z3; + tmp3 += z1 + z4; + } else { + /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */ + z2 = d5 + d3; + + z5 = MULTIPLY(z2, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_1_662939225); + z4 = MULTIPLY(-d5, FIX_0_390180644); + z2 = MULTIPLY(-z2, FIX_1_387039845); + tmp2 = MULTIPLY(d3, FIX_1_111140466); + z3 = MULTIPLY(-d3, FIX_1_961570560); + + tmp0 = z3 + z5; + tmp1 += z2; + tmp2 += z2; + tmp3 = z4 + z5; + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */ + z4 = d5 + d1; + + z5 = MULTIPLY(z4, FIX_1_175875602); + z1 = MULTIPLY(-d1, FIX_0_899976223); + tmp3 = MULTIPLY(d1, FIX_0_601344887); + tmp1 = MULTIPLY(-d5, FIX_0_509795579); + z2 = MULTIPLY(-d5, FIX_2_562915447); + z4 = MULTIPLY(z4, FIX_0_785694958); + + tmp0 = z1 + z5; + tmp1 += z4; + tmp2 = z2 + z5; + tmp3 += z4; + } else { + /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */ + tmp0 = MULTIPLY(d5, FIX_1_175875602); + tmp1 = MULTIPLY(d5, FIX_0_275899380); + tmp2 = MULTIPLY(-d5, FIX_1_387039845); + tmp3 = MULTIPLY(d5, FIX_0_785694958); + } + } + } else { + if (d3) { + if (d1) { + /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */ + z5 = d1 + d3; + tmp3 = MULTIPLY(d1, FIX_0_211164243); + tmp2 = MULTIPLY(-d3, FIX_1_451774981); + z1 = MULTIPLY(d1, FIX_1_061594337); + z2 = MULTIPLY(-d3, FIX_2_172734803); + z4 = MULTIPLY(z5, FIX_0_785694958); + z5 = MULTIPLY(z5, FIX_1_175875602); + + tmp0 = z1 - z4; + tmp1 = z2 + z4; + tmp2 += z5; + tmp3 += z5; + } else { + /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(-d3, FIX_0_785694958); + tmp1 = MULTIPLY(-d3, FIX_1_387039845); + tmp2 = MULTIPLY(-d3, FIX_0_275899380); + tmp3 = MULTIPLY(d3, FIX_1_175875602); + } + } else { + if (d1) { + /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = MULTIPLY(d1, FIX_0_275899380); + tmp1 = MULTIPLY(d1, FIX_0_785694958); + tmp2 = MULTIPLY(d1, FIX_1_175875602); + tmp3 = MULTIPLY(d1, FIX_1_387039845); + } else { + /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */ + tmp0 = tmp1 = tmp2 = tmp3 = 0; + } + } + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + + diff --git a/libavcodec/libac3/ac3.h b/libavcodec/libac3/ac3.h new file mode 100644 index 0000000000..117071e7a1 --- /dev/null +++ b/libavcodec/libac3/ac3.h @@ -0,0 +1,108 @@ +/* + * ac3.h + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +typedef struct ac3_ba_s { + uint16_t fsnroffst; // fine SNR offset + uint16_t fgaincod; // fast gain + uint16_t deltbae; // delta bit allocation exists + int8_t deltba[50]; // per-band delta bit allocation +} ac3_ba_t; + +typedef struct ac3_state_s { + uint8_t fscod; // sample rate + uint8_t halfrate; // halfrate factor + uint8_t acmod; // coded channels + float clev; // centre channel mix level + float slev; // surround channels mix level + uint8_t lfeon; // coded lfe channel + + int output; // type of output + float level; // output level + float bias; // output bias + + uint16_t cplinu; // coupling in use + uint16_t chincpl[5]; // channel coupled + uint16_t phsflginu; // phase flags in use (stereo only) + uint16_t cplbndstrc[18]; // coupling band structure + uint16_t cplstrtmant; // coupling channel start mantissa + uint16_t cplendmant; // coupling channel end mantissa + float cplco[5][18]; // coupling coordinates + + // derived information + uint16_t cplstrtbnd; // coupling start band (for bit allocation) + uint16_t ncplbnd; // number of coupling bands + + uint16_t rematflg[4]; // stereo rematrixing + + uint16_t endmant[5]; // channel end mantissa + + uint8_t cpl_exp[256]; // decoded coupling channel exponents + uint8_t fbw_exp[5][256]; // decoded channel exponents + uint8_t lfe_exp[7]; // decoded lfe channel exponents + + uint16_t sdcycod; // slow decay + uint16_t fdcycod; // fast decay + uint16_t sgaincod; // slow gain + uint16_t dbpbcod; // dB per bit - encodes the dbknee value + uint16_t floorcod; // masking floor + + uint16_t csnroffst; // coarse SNR offset + ac3_ba_t cplba; // coupling bit allocation parameters + ac3_ba_t ba[5]; // channel bit allocation parameters + ac3_ba_t lfeba; // lfe bit allocation parameters + + uint16_t cplfleak; // coupling fast leak init + uint16_t cplsleak; // coupling slow leak init + + // derived bit allocation information + int8_t fbw_bap[5][256]; + int8_t cpl_bap[256]; + int8_t lfe_bap[7]; +} ac3_state_t; + +/* samples work structure */ +typedef float stream_samples_t[6][256]; + +#define AC3_CHANNEL 0 +#define AC3_MONO 1 +#define AC3_STEREO 2 +#define AC3_3F 3 +#define AC3_2F1R 4 +#define AC3_3F1R 5 +#define AC3_2F2R 6 +#define AC3_3F2R 7 +#define AC3_CHANNEL1 8 +#define AC3_CHANNEL2 9 +#define AC3_DOLBY 10 +#define AC3_CHANNEL_MASK 15 + +#define AC3_LFE 16 +#define AC3_ADJUST_LEVEL 32 + +void ac3_init (void); +int ac3_syncinfo (uint8_t * buf, int * flags, + int * sample_rate, int * bit_rate); +int ac3_frame (ac3_state_t * state, uint8_t * buf, int * flags, + float * level, float bias); +int ac3_block (ac3_state_t * state); diff --git a/libavcodec/libac3/ac3_internal.h b/libavcodec/libac3/ac3_internal.h new file mode 100644 index 0000000000..1dce513f4c --- /dev/null +++ b/libavcodec/libac3/ac3_internal.h @@ -0,0 +1,51 @@ +/* + * ac3_internal.h + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#define LEVEL_PLUS3DB 1.4142135623730951 +#define LEVEL_3DB 0.7071067811865476 +#define LEVEL_45DB 0.5946035575013605 +#define LEVEL_6DB 0.5 + +/* Exponent strategy constants */ +#define EXP_REUSE (0) +#define EXP_D15 (1) +#define EXP_D25 (2) +#define EXP_D45 (3) + +/* Delta bit allocation constants */ +#define DELTA_BIT_REUSE (0) +#define DELTA_BIT_NEW (1) +#define DELTA_BIT_NONE (2) +#define DELTA_BIT_RESERVED (3) + +void bit_allocate (ac3_state_t * state, ac3_ba_t * ba, int bndstart, + int start, int end, int fastleak, int slowleak, + uint8_t * exp, int8_t * bap); + +int downmix_init (int input, int flags, float * level, float clev, float slev); +void downmix (float * samples, int acmod, int output, float level, float bias, + float clev, float slev); + +void imdct_init (void); +extern void (* imdct_256) (float data[], float delay[]); +extern void (* imdct_512) (float data[], float delay[]); diff --git a/libavcodec/libac3/bit_allocate.c b/libavcodec/libac3/bit_allocate.c new file mode 100644 index 0000000000..307c0074c6 --- /dev/null +++ b/libavcodec/libac3/bit_allocate.c @@ -0,0 +1,255 @@ +/* + * bit_allocate.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> +#include "ac3.h" +#include "ac3_internal.h" + +static int hthtab[3][50] = { + {0x730, 0x730, 0x7c0, 0x800, 0x820, 0x840, 0x850, 0x850, 0x860, 0x860, + 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x890, 0x890, + 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, + 0x910, 0x910, 0x910, 0x910, 0x900, 0x8f0, 0x8c0, 0x870, 0x820, 0x7e0, + 0x7a0, 0x770, 0x760, 0x7a0, 0x7c0, 0x7c0, 0x6e0, 0x400, 0x3c0, 0x3c0}, + {0x710, 0x710, 0x7a0, 0x7f0, 0x820, 0x830, 0x840, 0x850, 0x850, 0x860, + 0x860, 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, + 0x890, 0x890, 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8e0, 0x8f0, + 0x900, 0x910, 0x910, 0x910, 0x910, 0x900, 0x8e0, 0x8b0, 0x870, 0x820, + 0x7e0, 0x7b0, 0x760, 0x770, 0x7a0, 0x7c0, 0x780, 0x5d0, 0x3c0, 0x3c0}, + {0x680, 0x680, 0x750, 0x7b0, 0x7e0, 0x810, 0x820, 0x830, 0x840, 0x850, + 0x850, 0x850, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, + 0x870, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, 0x890, 0x8a0, 0x8b0, + 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, 0x910, 0x910, 0x910, 0x900, 0x8f0, + 0x8d0, 0x8b0, 0x840, 0x7f0, 0x790, 0x760, 0x7a0, 0x7c0, 0x7b0, 0x720} +}; + +static int8_t baptab[305] = { + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, // 93 padding entries + + 16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 14, 14, 14, 14, 14, 14, + 14, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9, + 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, + 5, 4, 4, -3, -3, 3, 3, 3, -2, -2, -1, -1, -1, -1, -1, 0, + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 // 148 padding entries +}; + +static int bndtab[30] = {21, 22, 23, 24, 25, 26, 27, 28, 31, 34, + 37, 40, 43, 46, 49, 55, 61, 67, 73, 79, + 85, 97, 109, 121, 133, 157, 181, 205, 229, 253}; + +static int8_t latab[256] = { + -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53, + -52, -52, -51, -50, -49, -48, -47, -47, -46, -45, -44, -44, + -43, -42, -41, -41, -40, -39, -38, -38, -37, -36, -36, -35, + -35, -34, -33, -33, -32, -32, -31, -30, -30, -29, -29, -28, + -28, -27, -27, -26, -26, -25, -25, -24, -24, -23, -23, -22, + -22, -21, -21, -21, -20, -20, -19, -19, -19, -18, -18, -18, + -17, -17, -17, -16, -16, -16, -15, -15, -15, -14, -14, -14, + -13, -13, -13, -13, -12, -12, -12, -12, -11, -11, -11, -11, + -10, -10, -10, -10, -10, -9, -9, -9, -9, -9, -8, -8, + -8, -8, -8, -8, -7, -7, -7, -7, -7, -7, -6, -6, + -6, -6, -6, -6, -6, -6, -5, -5, -5, -5, -5, -5, + -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, + -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, + -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, + -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 +}; + +#define UPDATE_LEAK() \ +do { \ + fastleak += fdecay; \ + if (fastleak > psd + fgain) \ + fastleak = psd + fgain; \ + slowleak += sdecay; \ + if (slowleak > psd + sgain) \ + slowleak = psd + sgain; \ +} while (0) + +#define COMPUTE_MASK() \ +do { \ + if (psd > dbknee) \ + mask -= (psd - dbknee) >> 2; \ + if (mask > hth [i >> halfrate]) \ + mask = hth [i >> halfrate]; \ + mask -= snroffset + 128 * deltba[i]; \ + mask = (mask > 0) ? 0 : ((-mask) >> 5); \ + mask -= floor; \ +} while (0) + +void bit_allocate (ac3_state_t * state, ac3_ba_t * ba, int bndstart, + int start, int end, int fastleak, int slowleak, + uint8_t * exp, int8_t * bap) +{ + static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410}; + static int dbpbtab[4] = {0xc00, 0x500, 0x300, 0x100}; + static int floortab[8] = {0x910, 0x950, 0x990, 0x9d0, + 0xa10, 0xa90, 0xb10, 0x1400}; + + int i, j; + int fdecay, fgain, sdecay, sgain, dbknee, floor, snroffset; + int psd, mask; + int8_t * deltba; + int * hth; + int halfrate; + + halfrate = state->halfrate; + fdecay = (63 + 20 * state->fdcycod) >> halfrate; + fgain = 128 + 128 * ba->fgaincod; + sdecay = (15 + 2 * state->sdcycod) >> halfrate; + sgain = slowgain[state->sgaincod]; + dbknee = dbpbtab[state->dbpbcod]; + hth = hthtab[state->fscod]; + /* + * if there is no delta bit allocation, make deltba point to an area + * known to contain zeroes. baptab+156 here. + */ + deltba = (ba->deltbae == DELTA_BIT_NONE) ? baptab + 156 : ba->deltba; + floor = floortab[state->floorcod]; + snroffset = 960 - 64 * state->csnroffst - 4 * ba->fsnroffst + floor; + floor >>= 5; + + i = bndstart; + j = start; + if (start == 0) { // not the coupling channel + int lowcomp; + + lowcomp = 0; + j = end - 1; + do { + if (i < j) { + if (exp[i+1] == exp[i] - 2) + lowcomp = 384; + else if (lowcomp && (exp[i+1] > exp[i])) + lowcomp -= 64; + } + psd = 128 * exp[i]; + mask = psd + fgain + lowcomp; + COMPUTE_MASK (); + bap[i++] = (baptab+156)[mask + 4 * exp[i]]; + } while ((i < 3) || ((i < 7) && (exp[i] > exp[i-1]))); + fastleak = psd + fgain; + slowleak = psd + sgain; + + while (i < 7) { + if (i < j) { + if (exp[i+1] == exp[i] - 2) + lowcomp = 384; + else if (lowcomp && (exp[i+1] > exp[i])) + lowcomp -= 64; + } + psd = 128 * exp[i]; + UPDATE_LEAK (); + mask = ((fastleak + lowcomp < slowleak) ? + fastleak + lowcomp : slowleak); + COMPUTE_MASK (); + bap[i++] = (baptab+156)[mask + 4 * exp[i]]; + } + + if (end == 7) // lfe channel + return; + + do { + if (exp[i+1] == exp[i] - 2) + lowcomp = 320; + else if (lowcomp && (exp[i+1] > exp[i])) + lowcomp -= 64; + psd = 128 * exp[i]; + UPDATE_LEAK (); + mask = ((fastleak + lowcomp < slowleak) ? + fastleak + lowcomp : slowleak); + COMPUTE_MASK (); + bap[i++] = (baptab+156)[mask + 4 * exp[i]]; + } while (i < 20); + + while (lowcomp > 128) { // two iterations maximum + lowcomp -= 128; + psd = 128 * exp[i]; + UPDATE_LEAK (); + mask = ((fastleak + lowcomp < slowleak) ? + fastleak + lowcomp : slowleak); + COMPUTE_MASK (); + bap[i++] = (baptab+156)[mask + 4 * exp[i]]; + } + j = i; + } + + do { + int startband, endband; + + startband = j; + endband = ((bndtab-20)[i] < end) ? (bndtab-20)[i] : end; + psd = 128 * exp[j++]; + while (j < endband) { + int next, delta; + + next = 128 * exp[j++]; + delta = next - psd; + switch (delta >> 9) { + case -6: case -5: case -4: case -3: case -2: + psd = next; + break; + case -1: + psd = next + latab[(-delta) >> 1]; + break; + case 0: + psd += latab[delta >> 1]; + break; + } + } + // minpsd = -289 + UPDATE_LEAK (); + mask = (fastleak < slowleak) ? fastleak : slowleak; + COMPUTE_MASK (); + i++; + j = startband; + do { + // max(mask+4*exp)=147=-(minpsd+fgain-deltba-snroffset)>>5+4*exp + // min(mask+4*exp)=-156=-(sgain-deltba-snroffset)>>5 + bap[j++] = (baptab+156)[mask + 4 * exp[j]]; + } while (j < endband); + } while (j < end); +} diff --git a/libavcodec/libac3/bitstream.c b/libavcodec/libac3/bitstream.c new file mode 100644 index 0000000000..7e9bd1f66a --- /dev/null +++ b/libavcodec/libac3/bitstream.c @@ -0,0 +1,77 @@ +/* + * bitstream.c + * + * Copyright (C) Aaron Holtzman - Dec 1999 + * + * This file is part of ac3dec, a free AC-3 audio decoder + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> + +#include "ac3.h" +#include "ac3_internal.h" +#include "bitstream.h" + +#define BUFFER_SIZE 4096 + +static uint8_t *buffer_start; + +uint32_t bits_left; +uint32_t current_word; + +void bitstream_set_ptr (uint8_t * buf) +{ + buffer_start = buf; + bits_left = 0; +} + +static inline void +bitstream_fill_current() +{ + current_word = *((uint32_t*)buffer_start)++; + current_word = swab32(current_word); +} + +// +// The fast paths for _get is in the +// bitstream.h header file so it can be inlined. +// +// The "bottom half" of this routine is suffixed _bh +// +// -ah +// + +uint32_t +bitstream_get_bh(uint32_t num_bits) +{ + uint32_t result; + + num_bits -= bits_left; + result = (current_word << (32 - bits_left)) >> (32 - bits_left); + + bitstream_fill_current(); + + if(num_bits != 0) + result = (result << num_bits) | (current_word >> (32 - num_bits)); + + bits_left = 32 - num_bits; + + return result; +} diff --git a/libavcodec/libac3/bitstream.h b/libavcodec/libac3/bitstream.h new file mode 100644 index 0000000000..84f3287c87 --- /dev/null +++ b/libavcodec/libac3/bitstream.h @@ -0,0 +1,68 @@ +/* + * bitstream.h + * + * Copyright (C) Aaron Holtzman - Dec 1999 + * + * This file is part of ac3dec, a free AC-3 audio decoder + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +//My new and improved vego-matic endian swapping routine +//(stolen from the kernel) +#ifdef WORDS_BIGENDIAN + +# define swab32(x) (x) + +#else + +# if defined (__i386__) + +# define swab32(x) __i386_swab32(x) + static inline const uint32_t __i386_swab32(uint32_t x) + { + __asm__("bswap %0" : "=r" (x) : "0" (x)); + return x; + } + +# else + +# define swab32(x)\ +((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \ + (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3])) + +# endif +#endif + +extern uint32_t bits_left; +extern uint32_t current_word; + +void bitstream_set_ptr (uint8_t * buf); +uint32_t bitstream_get_bh(uint32_t num_bits); + +static inline uint32_t +bitstream_get(uint32_t num_bits) +{ + uint32_t result; + + if(num_bits < bits_left) { + result = (current_word << (32 - bits_left)) >> (32 - num_bits); + bits_left -= num_bits; + return result; + } + + return bitstream_get_bh(num_bits); +} diff --git a/libavcodec/libac3/downmix.c b/libavcodec/libac3/downmix.c new file mode 100644 index 0000000000..9e7fbfb8b6 --- /dev/null +++ b/libavcodec/libac3/downmix.c @@ -0,0 +1,533 @@ +/* + * + * downmix.c + * + * Copyright (C) Aaron Holtzman - Sept 1999 + * + * Originally based on code by Yuqing Deng. + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include "ac3.h" +#include "ac3_internal.h" + + +#define CONVERT(acmod,output) (((output) << 3) + (acmod)) + +int downmix_init (int input, int flags, float * level, float clev, float slev) +{ + static uint8_t table[11][8] = { + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_STEREO, + AC3_STEREO, AC3_STEREO, AC3_STEREO, AC3_STEREO}, + {AC3_MONO, AC3_MONO, AC3_MONO, AC3_MONO, + AC3_MONO, AC3_MONO, AC3_MONO, AC3_MONO}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_STEREO, + AC3_STEREO, AC3_STEREO, AC3_STEREO, AC3_STEREO}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_3F, + AC3_STEREO, AC3_3F, AC3_STEREO, AC3_3F}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_STEREO, + AC3_2F1R, AC3_2F1R, AC3_2F1R, AC3_2F1R}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_STEREO, + AC3_2F1R, AC3_3F1R, AC3_2F1R, AC3_3F1R}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_3F, + AC3_2F2R, AC3_2F2R, AC3_2F2R, AC3_2F2R}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_3F, + AC3_2F2R, AC3_3F2R, AC3_2F2R, AC3_3F2R}, + {AC3_CHANNEL1, AC3_MONO, AC3_MONO, AC3_MONO, + AC3_MONO, AC3_MONO, AC3_MONO, AC3_MONO}, + {AC3_CHANNEL2, AC3_MONO, AC3_MONO, AC3_MONO, + AC3_MONO, AC3_MONO, AC3_MONO, AC3_MONO}, + {AC3_CHANNEL, AC3_DOLBY, AC3_STEREO, AC3_DOLBY, + AC3_DOLBY, AC3_DOLBY, AC3_DOLBY, AC3_DOLBY} + }; + int output; + + output = flags & AC3_CHANNEL_MASK; + if (output > AC3_DOLBY) + return -1; + + output = table[output][input & 7]; + + if ((output == AC3_STEREO) && + ((input == AC3_DOLBY) || ((input == AC3_3F) && (clev == LEVEL_3DB)))) + output = AC3_DOLBY; + + if (flags & AC3_ADJUST_LEVEL) + switch (CONVERT (input & 7, output)) { + + case CONVERT (AC3_3F, AC3_MONO): + *level *= LEVEL_3DB / (1 + clev); + break; + + case CONVERT (AC3_STEREO, AC3_MONO): + case CONVERT (AC3_2F2R, AC3_2F1R): + case CONVERT (AC3_3F2R, AC3_3F1R): + level_3db: + *level *= LEVEL_3DB; + break; + + case CONVERT (AC3_3F2R, AC3_2F1R): + if (clev < LEVEL_PLUS3DB - 1) + goto level_3db; + // break thru + case CONVERT (AC3_3F, AC3_STEREO): + case CONVERT (AC3_3F1R, AC3_2F1R): + case CONVERT (AC3_3F1R, AC3_2F2R): + case CONVERT (AC3_3F2R, AC3_2F2R): + *level /= 1 + clev; + break; + + case CONVERT (AC3_2F1R, AC3_MONO): + *level *= LEVEL_PLUS3DB / (2 + slev); + break; + + case CONVERT (AC3_2F1R, AC3_STEREO): + case CONVERT (AC3_3F1R, AC3_3F): + *level /= 1 + slev * LEVEL_3DB; + break; + + case CONVERT (AC3_3F1R, AC3_MONO): + *level *= LEVEL_3DB / (1 + clev + 0.5 * slev); + break; + + case CONVERT (AC3_3F1R, AC3_STEREO): + *level /= 1 + clev + slev * LEVEL_3DB; + break; + + case CONVERT (AC3_2F2R, AC3_MONO): + *level *= LEVEL_3DB / (1 + slev); + break; + + case CONVERT (AC3_2F2R, AC3_STEREO): + case CONVERT (AC3_3F2R, AC3_3F): + *level /= (1 + slev); + break; + + case CONVERT (AC3_3F2R, AC3_MONO): + *level *= LEVEL_3DB / (1 + clev + slev); + break; + + case CONVERT (AC3_3F2R, AC3_STEREO): + *level /= 1 + clev + slev; + break; + + case CONVERT (AC3_MONO, AC3_DOLBY): + *level *= LEVEL_PLUS3DB; + break; + + case CONVERT (AC3_3F, AC3_DOLBY): + case CONVERT (AC3_2F1R, AC3_DOLBY): + *level *= 1 / (1 + LEVEL_3DB); + break; + + case CONVERT (AC3_3F1R, AC3_DOLBY): + case CONVERT (AC3_2F2R, AC3_DOLBY): + *level *= 1 / (1 + 2 * LEVEL_3DB); + break; + + case CONVERT (AC3_3F2R, AC3_DOLBY): + *level *= 1 / (1 + 3 * LEVEL_3DB); + break; + } + + return output; +} + +static void mix1to1 (float * samples, float level, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = samples[i] * level + bias; +} + +static void move1to1 (float * src, float * dest, float level, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] = src[i] * level + bias; +} + +static void mix2to1 (float * samples, float level, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = (samples[i] + samples[i + 256]) * level + bias; +} + +static void move2to1 (float * src, float * dest, float level, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] = (src[i] + src[i + 256]) * level + bias; +} + +static void mix3to1 (float * samples, float level, float clev, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = ((samples[i] + samples[i + 512]) * level + + samples[i + 256] * clev + bias); +} + +static void mix21to1 (float * samples, float level, float slev, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = ((samples[i] + samples[i + 256]) * level + + samples[i + 512] * slev + bias); +} + +static void mix31to1 (float * samples, float level, float clev, float slev, + float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = ((samples[i] + samples[i + 512]) * level + + samples[i + 256] * clev + samples[i + 768] * slev + + bias); +} + +static void mix22to1 (float * samples, float level, float slev, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = ((samples[i] + samples[i + 256]) * level + + (samples[i + 512] + samples[i + 768]) * slev + bias); +} + +static void mix32to1 (float * samples, float level, float clev, float slev, + float bias) +{ + int i; + + for (i = 0; i < 256; i++) + samples[i] = ((samples[i] + samples[i + 512]) * level + + samples[i + 256] * clev + + (samples[i + 768] + samples[i + 1024]) * slev + bias); +} + +static void mix1to2 (float * src, float * dest, float level, float bias) +{ + int i; + + for (i = 0; i < 256; i++) + dest[i] = src[i] = src[i] * level + bias; +} + +static void mix3to2 (float * samples, float level, float clev, float bias) +{ + int i; + float common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] * clev + bias; + samples[i] = samples[i] * level + common; + samples[i + 256] = samples[i + 512] * level + common; + } +} + +static void mix21to2 (float * left, float * right, float level, float slev, + float bias) +{ + int i; + float common; + + for (i = 0; i < 256; i++) { + common = right[i + 256] * slev + bias; + left[i] = left[i] * level + common; + right[i] = right[i] * level + common; + } +} + +static void mix11to1 (float * front, float * rear, float level, float slev, + float bias) +{ + int i; + + for (i = 0; i < 256; i++) + front[i] = front[i] * level + rear[i] * slev + bias; +} + +static void mix31to2 (float * samples, float level, float clev, float slev, + float bias) +{ + int i; + float common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] * clev + samples[i + 768] * slev + bias; + samples[i] = samples[i] * level + common; + samples[i + 256] = samples[i + 512] * level + common; + } +} + +static void mix32to2 (float * samples, float level, float clev, float slev, + float bias) +{ + int i; + float common; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] * clev + bias; + samples[i] = samples[i] * level + common + samples[i + 768] * slev; + samples[i + 256] = (samples[i + 512] * level + common + + samples[i + 1024] * slev); + } +} + +static void mix21toS (float * samples, float level, float level3db, float bias) +{ + int i; + float surround; + + for (i = 0; i < 256; i++) { + surround = samples[i + 512] * level3db; + samples[i] = samples[i] * level - surround + bias; + samples[i + 256] = samples[i + 256] * level + surround + bias; + } +} + +static void mix22toS (float * samples, float level, float level3db, float bias) +{ + int i; + float surround; + + for (i = 0; i < 256; i++) { + surround = (samples[i + 512] + samples[i + 768]) * level3db; + samples[i] = samples[i] * level - surround + bias; + samples[i + 256] = samples[i + 256] * level + surround + bias; + } +} + +static void mix31toS (float * samples, float level, float level3db, float bias) +{ + int i; + float common, surround; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] * level3db + bias; + surround = samples[i + 768] * level3db; + samples[i] = samples[i] * level + common - surround; + samples[i + 256] = samples[i + 512] * level + common + surround; + } +} + +static void mix32toS (float * samples, float level, float level3db, float bias) +{ + int i; + float common, surround; + + for (i = 0; i < 256; i++) { + common = samples[i + 256] * level3db + bias; + surround = (samples[i + 768] + samples[i + 1024]) * level3db; + samples[i] = samples[i] * level + common - surround; + samples[i + 256] = samples[i + 512] * level + common + surround; + } +} + +void downmix (float * samples, int acmod, int output, float level, float bias, + float clev, float slev) +{ + switch (CONVERT (acmod, output & AC3_CHANNEL_MASK)) { + + case CONVERT (AC3_3F2R, AC3_3F2R): + mix1to1 (samples + 1024, level, bias); + case CONVERT (AC3_3F1R, AC3_3F1R): + case CONVERT (AC3_2F2R, AC3_2F2R): + mix1to1 (samples + 768, level, bias); + case CONVERT (AC3_3F, AC3_3F): + case CONVERT (AC3_2F1R, AC3_2F1R): + mix_3to3: + mix1to1 (samples + 512, level, bias); + case CONVERT (AC3_CHANNEL, AC3_CHANNEL): + case CONVERT (AC3_STEREO, AC3_STEREO): + case CONVERT (AC3_STEREO, AC3_DOLBY): + mix_2to2: + mix1to1 (samples + 256, level, bias); + case CONVERT (AC3_CHANNEL, AC3_CHANNEL1): + case CONVERT (AC3_MONO, AC3_MONO): + mix1to1 (samples, level, bias); + break; + + case CONVERT (AC3_CHANNEL, AC3_CHANNEL2): + mix_1to1_b: + mix1to1 (samples + 256, level, bias); + break; + + case CONVERT (AC3_STEREO, AC3_MONO): + mix_2to1: + mix2to1 (samples, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_2F1R, AC3_MONO): + if (slev == 0) + goto mix_2to1; + mix21to1 (samples, level * LEVEL_3DB, level * slev * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_2F2R, AC3_MONO): + if (slev == 0) + goto mix_2to1; + mix22to1 (samples, level * LEVEL_3DB, level * slev * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F, AC3_MONO): + mix_3to1: + mix3to1 (samples, level * LEVEL_3DB, level * clev * LEVEL_PLUS3DB, + bias); + break; + + case CONVERT (AC3_3F1R, AC3_MONO): + if (slev == 0) + goto mix_3to1; + mix31to1 (samples, level * LEVEL_3DB, level * clev * LEVEL_PLUS3DB, + level * slev * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F2R, AC3_MONO): + if (slev == 0) + goto mix_3to1; + mix32to1 (samples, level * LEVEL_3DB, level * clev * LEVEL_PLUS3DB, + level * slev * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_CHANNEL, AC3_MONO): + mix2to1 (samples, level * LEVEL_6DB, bias); + break; + + case CONVERT (AC3_MONO, AC3_DOLBY): + mix1to2 (samples, samples + 256, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F, AC3_DOLBY): + clev = LEVEL_3DB; + case CONVERT (AC3_3F, AC3_STEREO): + mix_3to2: + mix3to2 (samples, level, level * clev, bias); + break; + + case CONVERT (AC3_2F1R, AC3_DOLBY): + mix21toS (samples, level, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F1R, AC3_DOLBY): + mix31toS (samples, level, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_2F2R, AC3_DOLBY): + mix22toS (samples, level, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F2R, AC3_DOLBY): + mix32toS (samples, level, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_2F1R, AC3_STEREO): + if (slev == 0) + goto mix_2to2; + mix21to2 (samples, samples + 256, level, level * slev * LEVEL_3DB, + bias); + break; + + case CONVERT (AC3_3F1R, AC3_STEREO): + if (slev == 0) + goto mix_3to2; + mix31to2 (samples, level, level * clev, level * slev * LEVEL_3DB, + bias); + break; + + case CONVERT (AC3_2F2R, AC3_STEREO): + if (slev == 0) + goto mix_2to2; + mix11to1 (samples, samples + 512, level, level * slev, bias); + mix11to1 (samples + 256, samples + 768, level, level * slev, bias); + break; + + case CONVERT (AC3_3F2R, AC3_STEREO): + if (slev == 0) + goto mix_3to2; + mix32to2 (samples, level, level * clev, level * slev, bias); + break; + + case CONVERT (AC3_3F1R, AC3_3F): + if (slev == 0) + goto mix_3to3; + mix21to2 (samples, samples + 512, level, level * slev * LEVEL_3DB, + bias); + + case CONVERT (AC3_3F2R, AC3_3F): + if (slev == 0) + goto mix_3to3; + mix11to1 (samples, samples + 768, level, level * slev, bias); + mix11to1 (samples + 512, samples + 1024, level, level * slev, bias); + goto mix_1to1_b; + + case CONVERT (AC3_2F1R, AC3_2F2R): + mix1to2 (samples + 512, samples + 768, level * LEVEL_3DB, bias); + goto mix_2to2; + + case CONVERT (AC3_3F1R, AC3_3F2R): + mix1to2 (samples + 768, samples + 1024, level * LEVEL_3DB, bias); + goto mix_3to3; + + case CONVERT (AC3_2F2R, AC3_2F1R): + mix2to1 (samples + 512, level * LEVEL_3DB, bias); + goto mix_2to2; + + case CONVERT (AC3_3F2R, AC3_3F1R): + mix2to1 (samples + 768, level * LEVEL_3DB, bias); + goto mix_3to3; + + case CONVERT (AC3_3F1R, AC3_2F2R): + mix3to2 (samples, level, level * clev, bias); + mix1to2 (samples + 768, samples + 512, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F1R, AC3_2F1R): + mix3to2 (samples, level, level * clev, bias); + move1to1 (samples + 768, samples + 512, level, bias); + break; + + case CONVERT (AC3_3F2R, AC3_2F1R): + mix3to2 (samples, level, level * clev, bias); + move2to1 (samples + 768, samples + 512, level * LEVEL_3DB, bias); + break; + + case CONVERT (AC3_3F2R, AC3_2F2R): + mix3to2 (samples, level, level * clev, bias); + move1to1 (samples + 768, samples + 512, level, bias); + move1to1 (samples + 1024, samples + 768, level, bias); + break; + + } +} diff --git a/libavcodec/libac3/imdct.c b/libavcodec/libac3/imdct.c new file mode 100644 index 0000000000..7be33897f5 --- /dev/null +++ b/libavcodec/libac3/imdct.c @@ -0,0 +1,408 @@ +/* + * imdct.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +//#include "config.h" + +#include <inttypes.h> +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include "ac3.h" +#include "ac3_internal.h" + +void (* imdct_256) (float data[], float delay[]); +void (* imdct_512) (float data[], float delay[]); + +typedef struct complex_s +{ + float real; + float imag; +} complex_t; + + +/* 128 point bit-reverse LUT */ +static uint8_t bit_reverse_512[] = { + 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, + 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, + 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, + 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, + 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, + 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, + 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, + 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, + 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, + 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, + 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, + 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, + 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, + 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, + 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, + 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; + +static uint8_t bit_reverse_256[] = { + 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38, + 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c, + 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a, + 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e, + 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39, + 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d, + 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b, + 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f}; + +static complex_t buf[128]; + +/* Twiddle factor LUT */ +static complex_t w_1[1]; +static complex_t w_2[2]; +static complex_t w_4[4]; +static complex_t w_8[8]; +static complex_t w_16[16]; +static complex_t w_32[32]; +static complex_t w_64[64]; +static complex_t * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64}; + +/* Twiddle factors for IMDCT */ +static float xcos1[128]; +static float xsin1[128]; +static float xcos2[64]; +static float xsin2[64]; + +/* Windowing function for Modified DCT - Thank you acroread */ +float imdct_window[] = { + 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, + 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, + 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, + 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121, + 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770, + 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153, + 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389, + 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563, + 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699, + 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757, + 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626, + 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126, + 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019, + 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031, + 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873, + 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269, + 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981, + 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831, + 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716, + 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610, + 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560, + 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674, + 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099, + 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994, + 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513, + 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788, + 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919, + 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974, + 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, + 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, + 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, + 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 }; + + +static inline void swap_cmplx(complex_t *a, complex_t *b) +{ + complex_t tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} + + + +static inline complex_t cmplx_mult(complex_t a, complex_t b) +{ + complex_t ret; + + ret.real = a.real * b.real - a.imag * b.imag; + ret.imag = a.real * b.imag + a.imag * b.real; + + return ret; +} + +void +imdct_do_512(float data[],float delay[]) +{ + int i,k; + int p,q; + int m; + int two_m; + int two_m_plus_one; + + float tmp_a_i; + float tmp_a_r; + float tmp_b_i; + float tmp_b_r; + + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + // + // 512 IMDCT with source and dest data in 'data' + // + + // Pre IFFT complex multiply plus IFFT cmplx conjugate + for( i=0; i < 128; i++) { + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ + buf[i].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]); + buf[i].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i])); + } + + //Bit reversed shuffling + for(i=0; i<128; i++) { + k = bit_reverse_512[i]; + if (k < i) + swap_cmplx(&buf[i],&buf[k]); + } + + /* FFT Merge */ + for (m=0; m < 7; m++) { + if(m) + two_m = (1 << m); + else + two_m = 1; + + two_m_plus_one = (1 << (m+1)); + + for(k = 0; k < two_m; k++) { + for(i = 0; i < 128; i += two_m_plus_one) { + p = k + i; + q = p + two_m; + tmp_a_r = buf[p].real; + tmp_a_i = buf[p].imag; + tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; + tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; + buf[p].real = tmp_a_r + tmp_b_r; + buf[p].imag = tmp_a_i + tmp_b_i; + buf[q].real = tmp_a_r - tmp_b_r; + buf[q].imag = tmp_a_i - tmp_b_i; + } + } + } + + /* Post IFFT complex multiply plus IFFT complex conjugate*/ + for( i=0; i < 128; i++) { + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ + tmp_a_r = buf[i].real; + tmp_a_i = -1.0 * buf[i].imag; + buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]); + buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = imdct_window; + + /* Window and convert to real valued signal */ + for(i=0; i< 64; i++) { + *data_ptr++ = 2.0f * (-buf[64+i].imag * *window_ptr++ + *delay_ptr++); + *data_ptr++ = 2.0f * ( buf[64-i-1].real * *window_ptr++ + *delay_ptr++); + } + + for(i=0; i< 64; i++) { + *data_ptr++ = 2.0f * (-buf[i].real * *window_ptr++ + *delay_ptr++); + *data_ptr++ = 2.0f * ( buf[128-i-1].imag * *window_ptr++ + *delay_ptr++); + } + + /* The trailing edge of the window goes into the delay line */ + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].real * *--window_ptr; + *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + } + + for(i=0; i<64; i++) { + *delay_ptr++ = buf[i].imag * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + } +} + +void +imdct_do_256(float data[],float delay[]) +{ + int i,k; + int p,q; + int m; + int two_m; + int two_m_plus_one; + + float tmp_a_i; + float tmp_a_r; + float tmp_b_i; + float tmp_b_r; + + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + complex_t *buf_1, *buf_2; + + buf_1 = &buf[0]; + buf_2 = &buf[64]; + + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + for(k=0; k<64; k++) { + /* X1[k] = X[2*k] */ + /* X2[k] = X[2*k+1] */ + + p = 2 * (128-2*k-1); + q = 2 * (2 * k); + + /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf_1[k].real = data[p] * xcos2[k] - data[q] * xsin2[k]; + buf_1[k].imag = -1.0f * (data[q] * xcos2[k] + data[p] * xsin2[k]); + /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf_2[k].real = data[p + 1] * xcos2[k] - data[q + 1] * xsin2[k]; + buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[k] + data[p + 1] * xsin2[k]); + } + + //IFFT Bit reversed shuffling + for(i=0; i<64; i++) { + k = bit_reverse_256[i]; + if (k < i) { + swap_cmplx(&buf_1[i],&buf_1[k]); + swap_cmplx(&buf_2[i],&buf_2[k]); + } + } + + /* FFT Merge */ + for (m=0; m < 6; m++) { + two_m = (1 << m); + two_m_plus_one = (1 << (m+1)); + + //FIXME + if(m) + two_m = (1 << m); + else + two_m = 1; + + for(k = 0; k < two_m; k++) { + for(i = 0; i < 64; i += two_m_plus_one) { + p = k + i; + q = p + two_m; + //Do block 1 + tmp_a_r = buf_1[p].real; + tmp_a_i = buf_1[p].imag; + tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag; + tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag; + buf_1[p].real = tmp_a_r + tmp_b_r; + buf_1[p].imag = tmp_a_i + tmp_b_i; + buf_1[q].real = tmp_a_r - tmp_b_r; + buf_1[q].imag = tmp_a_i - tmp_b_i; + + //Do block 2 + tmp_a_r = buf_2[p].real; + tmp_a_i = buf_2[p].imag; + tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag; + tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag; + buf_2[p].real = tmp_a_r + tmp_b_r; + buf_2[p].imag = tmp_a_i + tmp_b_i; + buf_2[q].real = tmp_a_r - tmp_b_r; + buf_2[q].imag = tmp_a_i - tmp_b_i; + } + } + } + + /* Post IFFT complex multiply */ + for( i=0; i < 64; i++) { + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ + tmp_a_r = buf_1[i].real; + tmp_a_i = -buf_1[i].imag; + buf_1[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]); + buf_1[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]); + /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */ + tmp_a_r = buf_2[i].real; + tmp_a_i = -buf_2[i].imag; + buf_2[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]); + buf_2[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = imdct_window; + + /* Window and convert to real valued signal */ + for(i=0; i< 64; i++) { + *data_ptr++ = 2.0f * (-buf_1[i].imag * *window_ptr++ + *delay_ptr++); + *data_ptr++ = 2.0f * ( buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++); + } + + for(i=0; i< 64; i++) { + *data_ptr++ = 2.0f * (-buf_1[i].real * *window_ptr++ + *delay_ptr++); + *data_ptr++ = 2.0f * ( buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++); + } + + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf_2[i].real * *--window_ptr; + *delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr; + } + + for(i=0; i< 64; i++) { + *delay_ptr++ = buf_2[i].imag * *--window_ptr; + *delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr; + } +} + +void imdct_init (void) +{ +#ifdef LIBAC3_MLIB + void imdct_do_256_mlib(float data[],float delay[]); + void imdct_do_512_mlib(float data[],float delay[]); + + imdct_512 = imdct_do_512_mlib; + imdct_256 = imdct_do_256_mlib; +#else + int i, j, k; + + /* Twiddle factors to turn IFFT into IMDCT */ + for (i = 0; i < 128; i++) { + xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); + xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); + } + + /* More twiddle factors to turn IFFT into IMDCT */ + for (i = 0; i < 64; i++) { + xcos2[i] = -cos ((M_PI / 1024) * (8 * i + 1)); + xsin2[i] = -sin ((M_PI / 1024) * (8 * i + 1)); + } + + for (i = 0; i < 7; i++) { + j = 1 << i; + for (k = 0; k < j; k++) { + w[i][k].real = cos (-M_PI * k / j); + w[i][k].imag = sin (-M_PI * k / j); + } + } + imdct_512 = imdct_do_512; + imdct_256 = imdct_do_256; +#endif +} diff --git a/libavcodec/libac3/parse.c b/libavcodec/libac3/parse.c new file mode 100644 index 0000000000..c6eae00ba5 --- /dev/null +++ b/libavcodec/libac3/parse.c @@ -0,0 +1,684 @@ +/* + * parse.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +#include <inttypes.h> +#include <string.h> + +#include "ac3.h" +#include "ac3_internal.h" + +#include "bitstream.h" +#include "tables.h" + +extern stream_samples_t samples; // FIXME +static float delay[6][256]; + +void ac3_init (void) +{ + imdct_init (); +} + +static uint8_t halfrate[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3}; + +int ac3_syncinfo (uint8_t * buf, int * flags, + int * sample_rate, int * bit_rate) +{ + static int rate[] = { 32, 40, 48, 56, 64, 80, 96, 112, + 128, 160, 192, 224, 256, 320, 384, 448, + 512, 576, 640}; + static uint8_t lfeon[8] = {0x10, 0x10, 0x04, 0x04, 0x04, 0x01, 0x04, 0x01}; + int frmsizecod; + int bitrate; + int half; + int acmod; + + if ((buf[0] != 0x0b) || (buf[1] != 0x77)) // syncword + return 0; + + if (buf[5] >= 0x60) // bsid >= 12 + return 0; + half = halfrate[buf[5] >> 3]; + + // acmod, dsurmod and lfeon + acmod = buf[6] >> 5; + *flags = (((buf[6] & 0xf8) == 0x50) ? AC3_DOLBY : acmod) | + ((buf[6] & lfeon[acmod]) ? AC3_LFE : 0); + + frmsizecod = buf[4] & 63; + if (frmsizecod >= 38) + return 0; + bitrate = rate [frmsizecod >> 1]; + *bit_rate = (bitrate * 1000) >> half; + + switch (buf[4] & 0xc0) { + case 0: // 48 KHz + *sample_rate = 48000 >> half; + return 4 * bitrate; + case 0x40: + *sample_rate = 44100 >> half; + return 2 * (320 * bitrate / 147 + (frmsizecod & 1)); + case 0x80: + *sample_rate = 32000 >> half; + return 6 * bitrate; + default: + return 0; + } +} + +int ac3_frame (ac3_state_t * state, uint8_t * buf, int * flags, float * level, + float bias) +{ + static float clev[4] = {LEVEL_3DB, LEVEL_45DB, LEVEL_6DB, LEVEL_45DB}; + static float slev[4] = {LEVEL_3DB, LEVEL_6DB, 0, LEVEL_6DB}; + int chaninfo; + int acmod; + + state->fscod = buf[4] >> 6; + state->halfrate = halfrate[buf[5] >> 3]; + state->acmod = acmod = buf[6] >> 5; + + bitstream_set_ptr (buf + 6); + bitstream_get (3); // skip acmod we already parsed + + if ((acmod == 2) && (bitstream_get (2) == 2)) // dsurmod + acmod = AC3_DOLBY; + + if ((acmod & 1) && (acmod != 1)) + state->clev = clev[bitstream_get (2)]; // cmixlev + + if (acmod & 4) + state->slev = slev[bitstream_get (2)]; // surmixlev + + state->lfeon = bitstream_get (1); + + state->output = downmix_init (acmod, *flags, level, + state->clev, state->slev); + if (state->output < 0) + return 1; + *flags = state->output; + state->level = *level; + state->bias = bias; + + chaninfo = !acmod; + do { + bitstream_get (5); // dialnorm + if (bitstream_get (1)) // compre + bitstream_get (8); // compr + if (bitstream_get (1)) // langcode + bitstream_get (8); // langcod + if (bitstream_get (1)) // audprodie + bitstream_get (7); // mixlevel + roomtyp + } while (chaninfo--); + + bitstream_get (2); // copyrightb + origbs + + if (bitstream_get (1)) // timecod1e + bitstream_get (14); // timecod1 + if (bitstream_get (1)) // timecod2e + bitstream_get (14); // timecod2 + + if (bitstream_get (1)) { // addbsie + int addbsil; + + addbsil = bitstream_get (6); + do { + bitstream_get (8); // addbsi + } while (addbsil--); + } + + return 0; +} + +static int parse_exponents (int expstr, int ngrps, uint8_t exponent, + uint8_t * dest) +{ + int exps; + + while (ngrps--) { + exps = bitstream_get (7); + + exponent += exp_1[exps]; + if (exponent > 24) + return 1; + + switch (expstr) { + case EXP_D45: + *(dest++) = exponent; + *(dest++) = exponent; + case EXP_D25: + *(dest++) = exponent; + case EXP_D15: + *(dest++) = exponent; + } + + exponent += exp_2[exps]; + if (exponent > 24) + return 1; + + switch (expstr) { + case EXP_D45: + *(dest++) = exponent; + *(dest++) = exponent; + case EXP_D25: + *(dest++) = exponent; + case EXP_D15: + *(dest++) = exponent; + } + + exponent += exp_3[exps]; + if (exponent > 24) + return 1; + + switch (expstr) { + case EXP_D45: + *(dest++) = exponent; + *(dest++) = exponent; + case EXP_D25: + *(dest++) = exponent; + case EXP_D15: + *(dest++) = exponent; + } + } + + return 0; +} + +static int parse_deltba (int8_t * deltba) +{ + int deltnseg, deltlen, delta, j; + + memset (deltba, 0, 50); + + deltnseg = bitstream_get (3); + j = 0; + do { + j += bitstream_get (5); + deltlen = bitstream_get (4); + delta = bitstream_get (3); + delta -= (delta >= 4) ? 3 : 4; + if (!deltlen) + continue; + if (j + deltlen >= 50) + return 1; + while (deltlen--) + deltba[j++] = delta; + } while (deltnseg--); + + return 0; +} + +static inline int zero_snr_offsets (int nfchans, ac3_state_t * state) +{ + int i; + + if ((state->csnroffst) || (state->cplinu && state->cplba.fsnroffst) || + (state->lfeon && state->lfeba.fsnroffst)) + return 0; + for (i = 0; i < nfchans; i++) + if (state->ba[i].fsnroffst) + return 0; + return 1; +} + +static float q_1[2]; +static float q_2[2]; +static float q_4; +static int q_1_pointer; +static int q_2_pointer; +static int q_4_pointer; + +#define GET_COEFF(COEFF,DITHER) \ + switch (bap[i]) { \ + case 0: \ + DITHER (scale_factor[exp[i]]); \ + \ + case -1: \ + if (q_1_pointer >= 0) { \ + COEFF (q_1[q_1_pointer--] * scale_factor[exp[i]]); \ + } else { \ + int code; \ + \ + code = bitstream_get (5); \ + \ + q_1_pointer = 1; \ + q_1[0] = q_1_2[code]; \ + q_1[1] = q_1_1[code]; \ + COEFF (q_1_0[code] * scale_factor[exp[i]]); \ + } \ + \ + case -2: \ + if (q_2_pointer >= 0) { \ + COEFF (q_2[q_2_pointer--] * scale_factor[exp[i]]); \ + } else { \ + int code; \ + \ + code = bitstream_get (7); \ + \ + q_2_pointer = 1; \ + q_2[0] = q_2_2[code]; \ + q_2[1] = q_2_1[code]; \ + COEFF (q_2_0[code] * scale_factor[exp[i]]); \ + } \ + \ + case 3: \ + COEFF (q_3[bitstream_get (3)] * scale_factor[exp[i]]); \ + \ + case -3: \ + if (q_4_pointer == 0) { \ + q_4_pointer = -1; \ + COEFF (q_4 * scale_factor[exp[i]]); \ + } else { \ + int code; \ + \ + code = bitstream_get (7); \ + \ + q_4_pointer = 0; \ + q_4 = q_4_1[code]; \ + COEFF (q_4_0[code] * scale_factor[exp[i]]); \ + } \ + \ + case 4: \ + COEFF (q_5[bitstream_get (4)] * scale_factor[exp[i]]); \ + \ + default: \ + COEFF (((int16_t)(bitstream_get(bap[i]) << (16 - bap[i]))) * \ + scale_factor[exp[i]]); \ + } + +#define CHANNEL_COEFF(val) \ + coeff[i++] = val; \ + continue; + +#define CHANNEL_DITHER(val) \ + if (dither) { \ + coeff[i++] = dither_gen () * val; \ + continue; \ + } else { \ + coeff[i++] = 0; \ + continue; \ + } + +static uint16_t lfsr_state = 1; + +static inline int16_t dither_gen(void) +{ + int16_t state; + + state = dither_lut[lfsr_state >> 8] ^ (lfsr_state << 8); + + lfsr_state = (uint16_t) state; + + return ((state * (int) (LEVEL_3DB * 256)) >> 8); +} + +static void coeff_get (float * coeff, uint8_t * exp, int8_t * bap, + int dither, int end) +{ + int i; + + i = 0; + while (i < end) + GET_COEFF (CHANNEL_COEFF, CHANNEL_DITHER); +} + +#define COUPLING_COEFF(val) \ + cplcoeff = val; \ + break; + +#define COUPLING_DITHER(val) \ + cplcoeff = val; \ + for (ch = 0; ch < nfchans; ch++) \ + if (state->chincpl[ch]) { \ + if (dithflag[ch]) \ + samples[ch][i] = \ + state->cplco[ch][bnd] * dither_gen () * cplcoeff; \ + else \ + samples[ch][i] = 0; \ + } \ + i++; \ + continue; + +int ac3_block (ac3_state_t * state) +{ + static const uint8_t nfchans_tbl[8] = {2, 1, 2, 3, 3, 4, 4, 5}; + static int rematrix_band[4] = {25, 37, 61, 253}; + int i, nfchans, chaninfo; + uint8_t cplexpstr, chexpstr[5], lfeexpstr, do_bit_alloc, done_cpl; + uint8_t blksw[5], dithflag[5]; + + nfchans = nfchans_tbl[state->acmod]; + + for (i = 0; i < nfchans; i++) + blksw[i] = bitstream_get (1); + + for (i = 0; i < nfchans; i++) + dithflag[i] = bitstream_get (1); + + chaninfo = !(state->acmod); + do { + if (bitstream_get (1)) // dynrnge + bitstream_get (8); // dynrng + } while (chaninfo--); + + if (bitstream_get (1)) { // cplstre + state->cplinu = bitstream_get (1); + if (state->cplinu) { + static int bndtab[16] = {31, 35, 37, 39, 41, 42, 43, 44, + 45, 45, 46, 46, 47, 47, 48, 48}; + int cplbegf; + int cplendf; + int ncplsubnd; + + for (i = 0; i < nfchans; i++) + state->chincpl[i] = bitstream_get (1); + switch (state->acmod) { + case 0: case 1: + return 1; + case 2: + state->phsflginu = bitstream_get (1); + } + cplbegf = bitstream_get (4); + cplendf = bitstream_get (4); + + if (cplendf + 3 - cplbegf < 0) + return 1; + state->ncplbnd = ncplsubnd = cplendf + 3 - cplbegf; + state->cplstrtbnd = bndtab[cplbegf]; + state->cplstrtmant = cplbegf * 12 + 37; + state->cplendmant = cplendf * 12 + 73; + + for (i = 0; i < ncplsubnd - 1; i++) { + state->cplbndstrc[i] = bitstream_get (1); + state->ncplbnd -= state->cplbndstrc[i]; + } + state->cplbndstrc[i] = 0; // last value is a sentinel + } + } + + if (state->cplinu) { + int j, cplcoe; + + cplcoe = 0; + for (i = 0; i < nfchans; i++) + if (state->chincpl[i]) + if (bitstream_get (1)) { // cplcoe + int mstrcplco, cplcoexp, cplcomant; + + cplcoe = 1; + mstrcplco = 3 * bitstream_get (2); + for (j = 0; j < state->ncplbnd; j++) { + cplcoexp = bitstream_get (4); + cplcomant = bitstream_get (4); + if (cplcoexp == 15) + cplcomant <<= 14; + else + cplcomant = (cplcomant | 0x10) << 13; + state->cplco[i][j] = + cplcomant * scale_factor[cplcoexp + mstrcplco]; + } + } + if ((state->acmod == 2) && state->phsflginu && cplcoe) + for (j = 0; j < state->ncplbnd; j++) + if (bitstream_get (1)) // phsflg + state->cplco[1][j] = -state->cplco[1][j]; + } + + if ((state->acmod == 2) && (bitstream_get (1))) { // rematstr + int end; + + end = (state->cplinu) ? state->cplstrtmant : 253; + i = 0; + do + state->rematflg[i] = bitstream_get (1); + while (rematrix_band[i++] < end); + } + + cplexpstr = EXP_REUSE; + lfeexpstr = EXP_REUSE; + if (state->cplinu) + cplexpstr = bitstream_get (2); + for (i = 0; i < nfchans; i++) + chexpstr[i] = bitstream_get (2); + if (state->lfeon) + lfeexpstr = bitstream_get (1); + + for (i = 0; i < nfchans; i++) + if (chexpstr[i] != EXP_REUSE) { + if (state->cplinu && state->chincpl[i]) + state->endmant[i] = state->cplstrtmant; + else { + int chbwcod; + + chbwcod = bitstream_get (6); + if (chbwcod > 60) + return 1; + state->endmant[i] = chbwcod * 3 + 73; + } + } + + do_bit_alloc = 0; + + if (cplexpstr != EXP_REUSE) { + int cplabsexp, ncplgrps; + + do_bit_alloc = 1; + ncplgrps = ((state->cplendmant - state->cplstrtmant) / + (3 << (cplexpstr - 1))); + cplabsexp = bitstream_get (4) << 1; + if (parse_exponents (cplexpstr, ncplgrps, cplabsexp, + state->cpl_exp + state->cplstrtmant)) + return 1; + } + for (i = 0; i < nfchans; i++) + if (chexpstr[i] != EXP_REUSE) { + int grp_size, nchgrps; + + do_bit_alloc = 1; + grp_size = 3 << (chexpstr[i] - 1); + nchgrps = (state->endmant[i] + grp_size - 4) / grp_size; + state->fbw_exp[i][0] = bitstream_get (4); + if (parse_exponents (chexpstr[i], nchgrps, state->fbw_exp[i][0], + state->fbw_exp[i] + 1)) + return 1; + bitstream_get (2); // gainrng + } + if (lfeexpstr != EXP_REUSE) { + do_bit_alloc = 1; + state->lfe_exp[0] = bitstream_get (4); + if (parse_exponents (lfeexpstr, 2, state->lfe_exp[0], + state->lfe_exp + 1)) + return 1; + } + + if (bitstream_get (1)) { // baie + do_bit_alloc = 1; + state->sdcycod = bitstream_get (2); + state->fdcycod = bitstream_get (2); + state->sgaincod = bitstream_get (2); + state->dbpbcod = bitstream_get (2); + state->floorcod = bitstream_get (3); + } + if (bitstream_get (1)) { //snroffste + do_bit_alloc = 1; + state->csnroffst = bitstream_get (6); + if (state->cplinu) { + state->cplba.fsnroffst = bitstream_get (4); + state->cplba.fgaincod = bitstream_get (3); + } + for (i = 0; i < nfchans; i++) { + state->ba[i].fsnroffst = bitstream_get (4); + state->ba[i].fgaincod = bitstream_get (3); + } + if (state->lfeon) { + state->lfeba.fsnroffst = bitstream_get (4); + state->lfeba.fgaincod = bitstream_get (3); + } + } + if ((state->cplinu) && (bitstream_get (1))) { // cplleake + do_bit_alloc = 1; + state->cplfleak = 2304 - (bitstream_get (3) << 8); + state->cplsleak = 2304 - (bitstream_get (3) << 8); + } + + if (bitstream_get (1)) { // deltbaie + do_bit_alloc = 1; + if (state->cplinu) + state->cplba.deltbae = bitstream_get (2); + for (i = 0; i < nfchans; i++) + state->ba[i].deltbae = bitstream_get (2); + if (state->cplinu && (state->cplba.deltbae == DELTA_BIT_NEW) && + parse_deltba (state->cplba.deltba)) + return 1; + for (i = 0; i < nfchans; i++) + if ((state->ba[i].deltbae == DELTA_BIT_NEW) && + parse_deltba (state->ba[i].deltba)) + return 1; + } + + if (do_bit_alloc) { + if (zero_snr_offsets (nfchans, state)) { + memset (state->cpl_bap, 0, sizeof (state->cpl_bap)); + memset (state->fbw_bap, 0, sizeof (state->fbw_bap)); + memset (state->lfe_bap, 0, sizeof (state->lfe_bap)); + } else { + if (state->cplinu) + bit_allocate (state, &state->cplba, state->cplstrtbnd, + state->cplstrtmant, state->cplendmant, + state->cplfleak, state->cplsleak, + state->cpl_exp, state->cpl_bap); + for (i = 0; i < nfchans; i++) + bit_allocate (state, state->ba + i, 0, 0, state->endmant[i], + 0, 0, state->fbw_exp[i], state->fbw_bap[i]); + if (state->lfeon) { + state->lfeba.deltbae = DELTA_BIT_NONE; + bit_allocate (state, &state->lfeba, 0, 0, 7, 0, 0, + state->lfe_exp, state->lfe_bap); + } + } + } + + if (bitstream_get (1)) { // skiple + i = bitstream_get (9); // skipl + while (i--) + bitstream_get (8); + } + + q_1_pointer = q_2_pointer = q_4_pointer = -1; + done_cpl = 0; + + for (i = 0; i < nfchans; i++) { + int j; + + coeff_get (samples[i], state->fbw_exp[i], state->fbw_bap[i], + dithflag[i], state->endmant[i]); + + if (state->cplinu && state->chincpl[i]) { + if (!done_cpl) { + int i, i_end, bnd, sub_bnd, ch; + float cplcoeff; + + done_cpl = 1; + +#define bap state->cpl_bap +#define exp state->cpl_exp + + sub_bnd = bnd = 0; + i = state->cplstrtmant; + while (i < state->cplendmant) { + i_end = i + 12; + while (state->cplbndstrc[sub_bnd++]) + i_end += 12; + + while (i < i_end) { + GET_COEFF (COUPLING_COEFF, COUPLING_DITHER); + for (ch = 0; ch < nfchans; ch++) + if (state->chincpl[ch]) + samples[ch][i] = + state->cplco[ch][bnd] * cplcoeff; + i++; + } + bnd++; + } + +#undef bap +#undef exp + } + j = state->cplendmant; + } else + j = state->endmant[i]; + for (; j < 256; j++) + samples[i][j] = 0; + } + + if (state->acmod == 2) { + int j, end, band; + + end = ((state->endmant[0] < state->endmant[1]) ? + state->endmant[0] : state->endmant[1]); + + i = 0; + j = 13; + do { + if (!state->rematflg[i]) { + j = rematrix_band[i++]; + continue; + } + band = rematrix_band[i++]; + if (band > end) + band = end; + do { + float tmp0, tmp1; + + tmp0 = samples[0][j]; + tmp1 = samples[1][j]; + samples[0][j] = tmp0 + tmp1; + samples[1][j] = tmp0 - tmp1; + } while (++j < band); + } while (j < end); + } + + if (state->lfeon) { + coeff_get (samples[5], state->lfe_exp, state->lfe_bap, 0, 7); +#if 0 + for (i = 7; i < 256; i++) + samples[5][i] = 0; +#endif + } + + for (i = 0; i < nfchans; i++) + if (blksw[i]) + imdct_256 (samples[i], delay[i]); + else + imdct_512 (samples[i], delay[i]); + +#if 0 + if (state->lfeon) + imdct_512 (samples[5], delay[5]); +#endif + + downmix (*samples, state->acmod, state->output, state->level, state->bias, + state->clev, state->slev); + + return 0; +} diff --git a/libavcodec/libac3/tables.h b/libavcodec/libac3/tables.h new file mode 100644 index 0000000000..5825f9326a --- /dev/null +++ b/libavcodec/libac3/tables.h @@ -0,0 +1,250 @@ +/* + * parse.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +static int8_t exp_1[128] = { + -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 25,25,25 +}; +static int8_t exp_2[128] = { + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 25,25,25 +}; +static int8_t exp_3[128] = { + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + 25,25,25 +}; + +#define Q0 ((-2 << 15) / 3.0) +#define Q1 (0) +#define Q2 ((2 << 15) / 3.0) + +static const float q_1_0[ 32 ] = { + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + 0,0,0,0,0 +}; + +static const float q_1_1[ 32 ] = { + Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2, + Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2, + Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2, + 0,0,0,0,0 +}; + +static const float q_1_2[ 32 ] = { + Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2, + Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2, + Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2, + 0,0,0,0,0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 + +#define Q0 ((-4 << 15) / 5.0) +#define Q1 ((-2 << 15) / 5.0) +#define Q2 (0) +#define Q3 ((2 << 15) / 5.0) +#define Q4 ((4 << 15) / 5.0) + +static const float q_2_0[ 128 ] = { + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4, + 0,0,0 +}; + +static const float q_2_1[ 128 ] = { + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + 0,0,0 +}; + +static const float q_2_2[ 128 ] = { + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + 0,0,0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 + +static const float q_3[8] = { + (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, 0, + ( 2 << 15)/7.0, ( 4 << 15)/7.0, ( 6 << 15)/7.0, 0 +}; + +#define Q0 ((-10 << 15) / 11.0) +#define Q1 ((-8 << 15) / 11.0) +#define Q2 ((-6 << 15) / 11.0) +#define Q3 ((-4 << 15) / 11.0) +#define Q4 ((-2 << 15) / 11.0) +#define Q5 (0) +#define Q6 ((2 << 15) / 11.0) +#define Q7 ((4 << 15) / 11.0) +#define Q8 ((6 << 15) / 11.0) +#define Q9 ((8 << 15) / 11.0) +#define QA ((10 << 15) / 11.0) + +static const float q_4_0[ 128 ] = { + Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, + Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, + Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, + Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, + Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, + Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, + Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, + Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, + Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, + Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, + QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, + 0, 0, 0, 0, 0, 0, 0 +}; + +static const float q_4_1[ 128 ] = { + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + 0, 0, 0, 0, 0, 0, 0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 +#undef Q5 +#undef Q6 +#undef Q7 +#undef Q8 +#undef Q9 +#undef QA + +static const float q_5[16] = { + (-14 << 15)/15.0,(-12 << 15)/15.0,(-10 << 15)/15.0, + ( -8 << 15)/15.0,( -6 << 15)/15.0,( -4 << 15)/15.0, + ( -2 << 15)/15.0, 0 ,( 2 << 15)/15.0, + ( 4 << 15)/15.0,( 6 << 15)/15.0,( 8 << 15)/15.0, + ( 10 << 15)/15.0,( 12 << 15)/15.0,( 14 << 15)/15.0, + 0 +}; + +static const uint32_t u32_scale_factors[25] = +{ + 0x38000000, //2 ^ -(0 + 15) + 0x37800000, //2 ^ -(1 + 15) + 0x37000000, //2 ^ -(2 + 15) + 0x36800000, //2 ^ -(3 + 15) + 0x36000000, //2 ^ -(4 + 15) + 0x35800000, //2 ^ -(5 + 15) + 0x35000000, //2 ^ -(6 + 15) + 0x34800000, //2 ^ -(7 + 15) + 0x34000000, //2 ^ -(8 + 15) + 0x33800000, //2 ^ -(9 + 15) + 0x33000000, //2 ^ -(10 + 15) + 0x32800000, //2 ^ -(11 + 15) + 0x32000000, //2 ^ -(12 + 15) + 0x31800000, //2 ^ -(13 + 15) + 0x31000000, //2 ^ -(14 + 15) + 0x30800000, //2 ^ -(15 + 15) + 0x30000000, //2 ^ -(16 + 15) + 0x2f800000, //2 ^ -(17 + 15) + 0x2f000000, //2 ^ -(18 + 15) + 0x2e800000, //2 ^ -(19 + 15) + 0x2e000000, //2 ^ -(20 + 15) + 0x2d800000, //2 ^ -(21 + 15) + 0x2d000000, //2 ^ -(22 + 15) + 0x2c800000, //2 ^ -(23 + 15) + 0x2c000000 //2 ^ -(24 + 15) +}; + +static float * scale_factor = (float *) u32_scale_factors; + +static const uint16_t dither_lut[256] = { + 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055, + 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb, + 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198, + 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176, + 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf, + 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321, + 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202, + 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec, + 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761, + 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f, + 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac, + 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642, + 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb, + 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415, + 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536, + 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8, + 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c, + 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2, + 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1, + 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f, + 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6, + 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58, + 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b, + 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95, + 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918, + 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6, + 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5, + 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b, + 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82, + 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c, + 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f, + 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1 +}; diff --git a/libavcodec/mjpegenc.c b/libavcodec/mjpegenc.c new file mode 100644 index 0000000000..ead72605e6 --- /dev/null +++ b/libavcodec/mjpegenc.c @@ -0,0 +1,417 @@ +/* + * MJPEG encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +typedef struct MJpegContext { + UINT8 huff_size_dc_luminance[12]; + UINT16 huff_code_dc_luminance[12]; + UINT8 huff_size_dc_chrominance[12]; + UINT16 huff_code_dc_chrominance[12]; + + UINT8 huff_size_ac_luminance[256]; + UINT16 huff_code_ac_luminance[256]; + UINT8 huff_size_ac_chrominance[256]; + UINT16 huff_code_ac_chrominance[256]; +} MJpegContext; + +#define SOF0 0xc0 +#define SOI 0xd8 +#define EOI 0xd9 +#define DQT 0xdb +#define DHT 0xc4 +#define SOS 0xda + +#if 0 +/* These are the sample quantization tables given in JPEG spec section K.1. + * The spec says that the values given produce "good" quality, and + * when divided by 2, "very good" quality. + */ +static const unsigned char std_luminance_quant_tbl[64] = { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99 +}; +static const unsigned char std_chrominance_quant_tbl[64] = { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 +}; +#endif + +/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */ +/* IMPORTANT: these are only valid for 8-bit data precision! */ +static const UINT8 bits_dc_luminance[17] = +{ /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }; +static const UINT8 val_dc_luminance[] = +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + +static const UINT8 bits_dc_chrominance[17] = +{ /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 }; +static const UINT8 val_dc_chrominance[] = +{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; + +static const UINT8 bits_ac_luminance[17] = +{ /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d }; +static const UINT8 val_ac_luminance[] = +{ 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, + 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, + 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, + 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, + 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, + 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa +}; + +static const UINT8 bits_ac_chrominance[17] = +{ /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77 }; + +static const UINT8 val_ac_chrominance[] = +{ 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, + 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, + 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, + 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, + 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, + 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, + 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, + 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, + 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, + 0xf9, 0xfa +}; + + +/* isn't this function nicer than the one in the libjpeg ? */ +static void build_huffman_codes(UINT8 *huff_size, UINT16 *huff_code, + const UINT8 *bits_table, const UINT8 *val_table) +{ + int i, j, k,nb, code, sym; + + code = 0; + k = 0; + for(i=1;i<=16;i++) { + nb = bits_table[i]; + for(j=0;j<nb;j++) { + sym = val_table[k++]; + huff_size[sym] = i; + huff_code[sym] = code; + code++; + } + code <<= 1; + } +} + +int mjpeg_init(MpegEncContext *s) +{ + MJpegContext *m; + + m = malloc(sizeof(MJpegContext)); + if (!m) + return -1; + + /* build all the huffman tables */ + build_huffman_codes(m->huff_size_dc_luminance, + m->huff_code_dc_luminance, + bits_dc_luminance, + val_dc_luminance); + build_huffman_codes(m->huff_size_dc_chrominance, + m->huff_code_dc_chrominance, + bits_dc_chrominance, + val_dc_chrominance); + build_huffman_codes(m->huff_size_ac_luminance, + m->huff_code_ac_luminance, + bits_ac_luminance, + val_ac_luminance); + build_huffman_codes(m->huff_size_ac_chrominance, + m->huff_code_ac_chrominance, + bits_ac_chrominance, + val_ac_chrominance); + + s->mjpeg_ctx = m; + return 0; +} + +void mjpeg_close(MpegEncContext *s) +{ + free(s->mjpeg_ctx); +} + +static inline void put_marker(PutBitContext *p, int code) +{ + put_bits(p, 8, 0xff); + put_bits(p, 8, code); +} + +/* table_class: 0 = DC coef, 1 = AC coefs */ +static int put_huffman_table(MpegEncContext *s, int table_class, int table_id, + const UINT8 *bits_table, const UINT8 *value_table) +{ + PutBitContext *p = &s->pb; + int n, i; + + put_bits(p, 4, table_class); + put_bits(p, 4, table_id); + + n = 0; + for(i=1;i<=16;i++) { + n += bits_table[i]; + put_bits(p, 8, bits_table[i]); + } + + for(i=0;i<n;i++) + put_bits(p, 8, value_table[i]); + + return n + 17; +} + +static void jpeg_table_header(MpegEncContext *s) +{ + PutBitContext *p = &s->pb; + int i, size; + UINT8 *ptr; + + /* quant matrixes */ + put_marker(p, DQT); + put_bits(p, 16, 2 + 1 * (1 + 64)); + put_bits(p, 4, 0); /* 8 bit precision */ + put_bits(p, 4, 0); /* table 0 */ + for(i=0;i<64;i++) { + put_bits(p, 8, s->intra_matrix[i]); + } +#if 0 + put_bits(p, 4, 0); /* 8 bit precision */ + put_bits(p, 4, 1); /* table 1 */ + for(i=0;i<64;i++) { + put_bits(p, 8, s->chroma_intra_matrix[i]); + } +#endif + + /* huffman table */ + put_marker(p, DHT); + flush_put_bits(p); + ptr = p->buf_ptr; + put_bits(p, 16, 0); /* patched later */ + size = 2; + size += put_huffman_table(s, 0, 0, bits_dc_luminance, val_dc_luminance); + size += put_huffman_table(s, 0, 1, bits_dc_chrominance, val_dc_chrominance); + + size += put_huffman_table(s, 1, 0, bits_ac_luminance, val_ac_luminance); + size += put_huffman_table(s, 1, 1, bits_ac_chrominance, val_ac_chrominance); + ptr[0] = size >> 8; + ptr[1] = size; +} + +void mjpeg_picture_header(MpegEncContext *s) +{ + put_marker(&s->pb, SOI); + + jpeg_table_header(s); + + put_marker(&s->pb, SOF0); + + put_bits(&s->pb, 16, 17); + put_bits(&s->pb, 8, 8); /* 8 bits/component */ + put_bits(&s->pb, 16, s->height); + put_bits(&s->pb, 16, s->width); + put_bits(&s->pb, 8, 3); /* 3 components */ + + /* Y component */ + put_bits(&s->pb, 8, 1); /* component number */ + put_bits(&s->pb, 4, 2); /* H factor */ + put_bits(&s->pb, 4, 2); /* V factor */ + put_bits(&s->pb, 8, 0); /* select matrix */ + + /* Cb component */ + put_bits(&s->pb, 8, 2); /* component number */ + put_bits(&s->pb, 4, 1); /* H factor */ + put_bits(&s->pb, 4, 1); /* V factor */ + put_bits(&s->pb, 8, 0); /* select matrix */ + + /* Cr component */ + put_bits(&s->pb, 8, 3); /* component number */ + put_bits(&s->pb, 4, 1); /* H factor */ + put_bits(&s->pb, 4, 1); /* V factor */ + put_bits(&s->pb, 8, 0); /* select matrix */ + + /* scan header */ + put_marker(&s->pb, SOS); + put_bits(&s->pb, 16, 12); /* length */ + put_bits(&s->pb, 8, 3); /* 3 components */ + + /* Y component */ + put_bits(&s->pb, 8, 1); /* index */ + put_bits(&s->pb, 4, 0); /* DC huffman table index */ + put_bits(&s->pb, 4, 0); /* AC huffman table index */ + + /* Cb component */ + put_bits(&s->pb, 8, 2); /* index */ + put_bits(&s->pb, 4, 1); /* DC huffman table index */ + put_bits(&s->pb, 4, 1); /* AC huffman table index */ + + /* Cr component */ + put_bits(&s->pb, 8, 3); /* index */ + put_bits(&s->pb, 4, 1); /* DC huffman table index */ + put_bits(&s->pb, 4, 1); /* AC huffman table index */ + + put_bits(&s->pb, 8, 0); /* Ss (not used) */ + put_bits(&s->pb, 8, 63); /* Se (not used) */ + put_bits(&s->pb, 8, 0); /* (not used) */ +} + +void mjpeg_picture_trailer(MpegEncContext *s) +{ + jflush_put_bits(&s->pb); + put_marker(&s->pb, EOI); +} + +static inline void encode_dc(MpegEncContext *s, int val, + UINT8 *huff_size, UINT16 *huff_code) +{ + int mant, nbits; + + if (val == 0) { + jput_bits(&s->pb, huff_size[0], huff_code[0]); + } else { + mant = val; + if (val < 0) { + val = -val; + mant--; + } + + /* compute the log (XXX: optimize) */ + nbits = 0; + while (val != 0) { + val = val >> 1; + nbits++; + } + + jput_bits(&s->pb, huff_size[nbits], huff_code[nbits]); + + jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1)); + } +} + +static void encode_block(MpegEncContext *s, DCTELEM *block, int n) +{ + int mant, nbits, code, i, j; + int component, dc, run, last_index, val; + MJpegContext *m = s->mjpeg_ctx; + UINT8 *huff_size_ac; + UINT16 *huff_code_ac; + + /* DC coef */ + component = (n <= 3 ? 0 : n - 4 + 1); + dc = block[0]; /* overflow is impossible */ + val = dc - s->last_dc[component]; + if (n < 4) { + encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance); + huff_size_ac = m->huff_size_ac_luminance; + huff_code_ac = m->huff_code_ac_luminance; + } else { + encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance); + huff_size_ac = m->huff_size_ac_chrominance; + huff_code_ac = m->huff_code_ac_chrominance; + } + s->last_dc[component] = dc; + + /* AC coefs */ + + run = 0; + last_index = s->block_last_index[n]; + for(i=1;i<=last_index;i++) { + j = zigzag_direct[i]; + val = block[j]; + if (val == 0) { + run++; + } else { + while (run >= 16) { + jput_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]); + run -= 16; + } + mant = val; + if (val < 0) { + val = -val; + mant--; + } + + /* compute the log (XXX: optimize) */ + nbits = 0; + while (val != 0) { + val = val >> 1; + nbits++; + } + code = (run << 4) | nbits; + + jput_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]); + + jput_bits(&s->pb, nbits, mant & ((1 << nbits) - 1)); + run = 0; + } + } + + /* output EOB only if not already 64 values */ + if (last_index < 63 || run != 0) + jput_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]); +} + +void mjpeg_encode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int i; + for(i=0;i<6;i++) { + encode_block(s, block[i], i); + } +} diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c new file mode 100644 index 0000000000..09fe662613 --- /dev/null +++ b/libavcodec/motion_est.c @@ -0,0 +1,517 @@ +/* + * Motion estimation + * Copyright (c) 2000,2001 Gerard Lantau. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +static void halfpel_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax); + +/* config it to test motion vector encoding (send random vectors) */ +//#define CONFIG_TEST_MV_ENCODE + +static int pix_sum(UINT8 * pix, int line_size) +{ + int s, i, j; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += pix[0]; + s += pix[1]; + s += pix[2]; + s += pix[3]; + s += pix[4]; + s += pix[5]; + s += pix[6]; + s += pix[7]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +static int pix_norm1(UINT8 * pix, int line_size) +{ + int s, i, j; + UINT32 *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += sq[pix[0]]; + s += sq[pix[1]]; + s += sq[pix[2]]; + s += sq[pix[3]]; + s += sq[pix[4]]; + s += sq[pix[5]]; + s += sq[pix[6]]; + s += sq[pix[7]]; + pix += 8; + } + pix += line_size - 16; + } + return s; +} + +static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size) +{ + int s, i, j; + UINT32 *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + s += sq[pix1[4] - pix2[4]]; + s += sq[pix1[5] - pix2[5]]; + s += sq[pix1[6] - pix2[6]]; + s += sq[pix1[7] - pix2[7]]; + pix1 += 8; + pix2 += 8; + } + pix1 += line_size - 16; + pix2 += line_size - 16; + } + return s; +} + +static void no_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr) +{ + *mx_ptr = 16 * s->mb_x; + *my_ptr = 16 * s->mb_y; +} + +static int full_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int range, + int xmin, int ymin, int xmax, int ymax) +{ + int x1, y1, x2, y2, xx, yy, x, y; + int mx, my, dmin, d; + UINT8 *pix; + + xx = 16 * s->mb_x; + yy = 16 * s->mb_y; + x1 = xx - range + 1; /* we loose one pixel to avoid boundary pb with half pixel pred */ + if (x1 < xmin) + x1 = xmin; + x2 = xx + range - 1; + if (x2 > xmax) + x2 = xmax; + y1 = yy - range + 1; + if (y1 < ymin) + y1 = ymin; + y2 = yy + range - 1; + if (y2 > ymax) + y2 = ymax; + pix = s->new_picture[0] + (yy * s->linesize) + xx; + dmin = 0x7fffffff; + mx = 0; + my = 0; + for (y = y1; y <= y2; y++) { + for (x = x1; x <= x2; x++) { + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, + s->linesize, 16); + if (d < dmin || + (d == dmin && + (abs(x - xx) + abs(y - yy)) < + (abs(mx - xx) + abs(my - yy)))) { + dmin = d; + mx = x; + my = y; + } + } + } + + *mx_ptr = mx; + *my_ptr = my; + +#if 0 + if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) || + *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) { + fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr); + } +#endif + return dmin; +} + + +static int log_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int range, + int xmin, int ymin, int xmax, int ymax) +{ + int x1, y1, x2, y2, xx, yy, x, y; + int mx, my, dmin, d; + UINT8 *pix; + + xx = s->mb_x << 4; + yy = s->mb_y << 4; + + /* Left limit */ + x1 = xx - range; + if (x1 < xmin) + x1 = xmin; + + /* Right limit */ + x2 = xx + range; + if (x2 > xmax) + x2 = xmax; + + /* Upper limit */ + y1 = yy - range; + if (y1 < ymin) + y1 = ymin; + + /* Lower limit */ + y2 = yy + range; + if (y2 > ymax) + y2 = ymax; + + pix = s->new_picture[0] + (yy * s->linesize) + xx; + dmin = 0x7fffffff; + mx = 0; + my = 0; + + do { + for (y = y1; y <= y2; y += range) { + for (x = x1; x <= x2; x += range) { + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { + dmin = d; + mx = x; + my = y; + } + } + } + + range = range >> 1; + + x1 = mx - range; + if (x1 < xmin) + x1 = xmin; + + x2 = mx + range; + if (x2 > xmax) + x2 = xmax; + + y1 = my - range; + if (y1 < ymin) + y1 = ymin; + + y2 = my + range; + if (y2 > ymax) + y2 = ymax; + + } while (range >= 1); + +#ifdef DEBUG + fprintf(stderr, "log - MX: %d\tMY: %d\n", mx, my); +#endif + *mx_ptr = mx; + *my_ptr = my; + return dmin; +} + +static int phods_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int range, + int xmin, int ymin, int xmax, int ymax) +{ + int x1, y1, x2, y2, xx, yy, x, y, lastx, d; + int mx, my, dminx, dminy; + UINT8 *pix; + + xx = s->mb_x << 4; + yy = s->mb_y << 4; + + /* Left limit */ + x1 = xx - range; + if (x1 < xmin) + x1 = xmin; + + /* Right limit */ + x2 = xx + range; + if (x2 > xmax) + x2 = xmax; + + /* Upper limit */ + y1 = yy - range; + if (y1 < ymin) + y1 = ymin; + + /* Lower limit */ + y2 = yy + range; + if (y2 > ymax) + y2 = ymax; + + pix = s->new_picture[0] + (yy * s->linesize) + xx; + mx = 0; + my = 0; + + x = xx; + y = yy; + do { + dminx = 0x7fffffff; + dminy = 0x7fffffff; + + lastx = x; + for (x = x1; x <= x2; x += range) { + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { + dminx = d; + mx = x; + } + } + + x = lastx; + for (y = y1; y <= y2; y += range) { + d = pix_abs16x16(pix, s->last_picture[0] + (y * s->linesize) + x, s->linesize, 16); + if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { + dminy = d; + my = y; + } + } + + range = range >> 1; + + x = mx; + y = my; + x1 = mx - range; + if (x1 < xmin) + x1 = xmin; + + x2 = mx + range; + if (x2 > xmax) + x2 = xmax; + + y1 = my - range; + if (y1 < ymin) + y1 = ymin; + + y2 = my + range; + if (y2 > ymax) + y2 = ymax; + + } while (range >= 1); + +#ifdef DEBUG + fprintf(stderr, "phods - MX: %d\tMY: %d\n", mx, my); +#endif + + /* half pixel search */ + *mx_ptr = mx; + *my_ptr = my; + return dminy; +} + +/* The idea would be to make half pel ME after Inter/Intra decision to + save time. */ +static void halfpel_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax) +{ + int mx, my, mx1, my1, d, xx, yy, dminh; + UINT8 *pix; + + mx = *mx_ptr << 1; + my = *my_ptr << 1; + + xx = 16 * s->mb_x; + yy = 16 * s->mb_y; + + dminh = dmin; + + /* Half pixel search */ + mx1 = mx; + my1 = my; + + pix = s->new_picture[0] + (yy * s->linesize) + xx; + + if ((mx > (xmin << 1)) && mx < (xmax << 1) && + (my > (ymin << 1)) && my < (ymax << 1)) { + int dx, dy, px, py; + UINT8 *ptr; + for (dy = -1; dy <= 1; dy++) { + for (dx = -1; dx <= 1; dx++) { + if (dx != 0 || dy != 0) { + px = mx1 + dx; + py = my1 + dy; + ptr = s->last_picture[0] + ((py >> 1) * s->linesize) + (px >> 1); + switch (((py & 1) << 1) | (px & 1)) { + default: + case 0: + d = pix_abs16x16(pix, ptr, s->linesize, 16); + break; + case 1: + d = pix_abs16x16_x2(pix, ptr, s->linesize, 16); + break; + case 2: + d = pix_abs16x16_y2(pix, ptr, s->linesize, 16); + break; + case 3: + d = pix_abs16x16_xy2(pix, ptr, s->linesize, 16); + break; + } + if (d < dminh) { + dminh = d; + mx = px; + my = py; + } + } + } + } + } + + *mx_ptr = mx - (xx << 1); + *my_ptr = my - (yy << 1); + //fprintf(stderr,"half - MX: %d\tMY: %d\n",*mx_ptr ,*my_ptr); +} + +#ifndef CONFIG_TEST_MV_ENCODE + +int estimate_motion(MpegEncContext * s, + int mb_x, int mb_y, + int *mx_ptr, int *my_ptr) +{ + UINT8 *pix, *ppix; + int sum, varc, vard, mx, my, range, dmin, xx, yy; + int xmin, ymin, xmax, ymax; + + range = 8 * (1 << (s->f_code - 1)); + /* XXX: temporary kludge to avoid overflow for msmpeg4 */ + if (s->out_format == FMT_H263 && !s->h263_msmpeg4) + range = range * 2; + + if (s->unrestricted_mv) { + xmin = -16; + ymin = -16; + xmax = s->width; + ymax = s->height; + } else { + xmin = 0; + ymin = 0; + xmax = s->width - 16; + ymax = s->height - 16; + } + + switch(s->full_search) { + case ME_ZERO: + default: + no_motion_search(s, &mx, &my); + dmin = 0; + break; + case ME_FULL: + dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax); + break; + case ME_LOG: + dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax); + break; + case ME_PHODS: + dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax); + break; + } +#ifdef CONFIG_MMX + if (mm_flags & MM_MMX) + emms(); +#endif + + /* intra / predictive decision */ + xx = mb_x * 16; + yy = mb_y * 16; + + pix = s->new_picture[0] + (yy * s->linesize) + xx; + /* At this point (mx,my) are full-pell and the absolute displacement */ + ppix = s->last_picture[0] + (my * s->linesize) + mx; + + sum = pix_sum(pix, s->linesize); + varc = pix_norm1(pix, s->linesize); + vard = pix_norm(pix, ppix, s->linesize); + + vard = vard >> 8; + sum = sum >> 8; + varc = (varc >> 8) - (sum * sum); +#if 0 + printf("varc=%d (sum=%d) vard=%d mx=%d my=%d\n", + varc, sum, vard, mx - xx, my - yy); +#endif + if (vard <= 64 || vard < varc) { + if (s->full_search != ME_ZERO) { + halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax); + } else { + mx -= 16 * s->mb_x; + my -= 16 * s->mb_y; + } + *mx_ptr = mx; + *my_ptr = my; + return 0; + } else { + *mx_ptr = 0; + *my_ptr = 0; + return 1; + } +} + +#else + +/* test version which generates valid random vectors */ +int estimate_motion(MpegEncContext * s, + int mb_x, int mb_y, + int *mx_ptr, int *my_ptr) +{ + int xx, yy, x1, y1, x2, y2, range; + + if ((random() % 10) >= 5) { + range = 8 * (1 << (s->f_code - 1)); + if (s->out_format == FMT_H263 && !s->h263_msmpeg4) + range = range * 2; + + xx = 16 * s->mb_x; + yy = 16 * s->mb_y; + x1 = xx - range; + if (x1 < 0) + x1 = 0; + x2 = xx + range - 1; + if (x2 > (s->width - 16)) + x2 = s->width - 16; + y1 = yy - range; + if (y1 < 0) + y1 = 0; + y2 = yy + range - 1; + if (y2 > (s->height - 16)) + y2 = s->height - 16; + + *mx_ptr = (random() % (2 * (x2 - x1 + 1))) + 2 * (x1 - xx); + *my_ptr = (random() % (2 * (y2 - y1 + 1))) + 2 * (y1 - yy); + return 0; + } else { + *mx_ptr = 0; + *my_ptr = 0; + return 1; + } +} + +#endif diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c new file mode 100644 index 0000000000..3e2e523560 --- /dev/null +++ b/libavcodec/mpeg12.c @@ -0,0 +1,1530 @@ +/* + * MPEG1 encoder / MPEG2 decoder + * Copyright (c) 2000,2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +#include "mpeg12data.h" + +//#define DEBUG + +#ifdef DEBUG +#define dprintf(fmt,args...) printf(fmt, ## args) +#else +#define dprintf(fmt,args...) +#endif + +/* Start codes. */ +#define SEQ_END_CODE 0x000001b7 +#define SEQ_START_CODE 0x000001b3 +#define GOP_START_CODE 0x000001b8 +#define PICTURE_START_CODE 0x00000100 +#define SLICE_MIN_START_CODE 0x00000101 +#define SLICE_MAX_START_CODE 0x000001af +#define EXT_START_CODE 0x000001b5 +#define USER_START_CODE 0x000001b2 + +static void mpeg1_encode_block(MpegEncContext *s, + DCTELEM *block, + int component); +static void mpeg1_encode_motion(MpegEncContext *s, int val); +static void mpeg1_skip_picture(MpegEncContext *s, int pict_num); +static int mpeg1_decode_block(MpegEncContext *s, + DCTELEM *block, + int n); +static int mpeg2_decode_block_non_intra(MpegEncContext *s, + DCTELEM *block, + int n); +static int mpeg2_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n); +static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred); + +static void put_header(MpegEncContext *s, int header) +{ + align_put_bits(&s->pb); + put_bits(&s->pb, 32, header); +} + +/* put sequence header if needed */ +static void mpeg1_encode_sequence_header(MpegEncContext *s) +{ + unsigned int vbv_buffer_size; + unsigned int fps, n, v; + UINT64 time_code; + + if ((s->picture_number % s->gop_size) == 0) { + /* mpeg1 header repeated every gop */ + put_header(s, SEQ_START_CODE); + + /* search closest frame rate */ + { + int i, dmin, d; + s->frame_rate_index = 0; + dmin = 0x7fffffff; + for(i=1;i<9;i++) { + d = abs(s->frame_rate - frame_rate_tab[i]); + if (d < dmin) { + dmin = d; + s->frame_rate_index = i; + } + } + } + + put_bits(&s->pb, 12, s->width); + put_bits(&s->pb, 12, s->height); + put_bits(&s->pb, 4, 1); /* 1/1 aspect ratio */ + put_bits(&s->pb, 4, s->frame_rate_index); + v = s->bit_rate / 400; + if (v > 0x3ffff) + v = 0x3ffff; + put_bits(&s->pb, 18, v); + put_bits(&s->pb, 1, 1); /* marker */ + /* vbv buffer size: slightly greater than an I frame. We add + some margin just in case */ + vbv_buffer_size = (3 * s->I_frame_bits) / (2 * 8); + put_bits(&s->pb, 10, (vbv_buffer_size + 16383) / 16384); + put_bits(&s->pb, 1, 1); /* constrained parameter flag */ + put_bits(&s->pb, 1, 0); /* no custom intra matrix */ + put_bits(&s->pb, 1, 0); /* no custom non intra matrix */ + + put_header(s, GOP_START_CODE); + put_bits(&s->pb, 1, 0); /* do drop frame */ + /* time code : we must convert from the real frame rate to a + fake mpeg frame rate in case of low frame rate */ + fps = frame_rate_tab[s->frame_rate_index]; + time_code = s->fake_picture_number * FRAME_RATE_BASE; + s->gop_picture_number = s->fake_picture_number; + put_bits(&s->pb, 5, (time_code / (fps * 3600)) % 24); + put_bits(&s->pb, 6, (time_code / (fps * 60)) % 60); + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 6, (time_code / fps) % 60); + put_bits(&s->pb, 6, (time_code % fps) / FRAME_RATE_BASE); + put_bits(&s->pb, 1, 1); /* closed gop */ + put_bits(&s->pb, 1, 0); /* broken link */ + } + + if (s->frame_rate < (24 * FRAME_RATE_BASE) && s->picture_number > 0) { + /* insert empty P pictures to slow down to the desired + frame rate. Each fake pictures takes about 20 bytes */ + fps = frame_rate_tab[s->frame_rate_index]; + n = ((s->picture_number * fps) / s->frame_rate) - 1; + while (s->fake_picture_number < n) { + mpeg1_skip_picture(s, s->fake_picture_number - + s->gop_picture_number); + s->fake_picture_number++; + } + + } + s->fake_picture_number++; +} + + +/* insert a fake P picture */ +static void mpeg1_skip_picture(MpegEncContext *s, int pict_num) +{ + unsigned int mb_incr; + + /* mpeg1 picture header */ + put_header(s, PICTURE_START_CODE); + /* temporal reference */ + put_bits(&s->pb, 10, pict_num & 0x3ff); + + put_bits(&s->pb, 3, P_TYPE); + put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */ + + put_bits(&s->pb, 1, 1); /* integer coordinates */ + put_bits(&s->pb, 3, 1); /* forward_f_code */ + + put_bits(&s->pb, 1, 0); /* extra bit picture */ + + /* only one slice */ + put_header(s, SLICE_MIN_START_CODE); + put_bits(&s->pb, 5, 1); /* quantizer scale */ + put_bits(&s->pb, 1, 0); /* slice extra information */ + + mb_incr = 1; + put_bits(&s->pb, mbAddrIncrTable[mb_incr - 1][1], + mbAddrIncrTable[mb_incr - 1][0]); + + /* empty macroblock */ + put_bits(&s->pb, 3, 1); /* motion only */ + + /* zero motion x & y */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 1); + + /* output a number of empty slice */ + mb_incr = s->mb_width * s->mb_height - 1; + while (mb_incr > 33) { + put_bits(&s->pb, 11, 0x008); + mb_incr -= 33; + } + put_bits(&s->pb, mbAddrIncrTable[mb_incr - 1][1], + mbAddrIncrTable[mb_incr - 1][0]); + + /* empty macroblock */ + put_bits(&s->pb, 3, 1); /* motion only */ + + /* zero motion x & y */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, 1, 1); +} + +void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number) +{ + static int done; + + if (!done) { + done = 1; + init_rl(&rl_mpeg1); + } + mpeg1_encode_sequence_header(s); + + /* mpeg1 picture header */ + put_header(s, PICTURE_START_CODE); + /* temporal reference */ + put_bits(&s->pb, 10, (s->fake_picture_number - + s->gop_picture_number) & 0x3ff); + + put_bits(&s->pb, 3, s->pict_type); + put_bits(&s->pb, 16, 0xffff); /* non constant bit rate */ + + if (s->pict_type == P_TYPE) { + put_bits(&s->pb, 1, 0); /* half pel coordinates */ + put_bits(&s->pb, 3, s->f_code); /* forward_f_code */ + } + + put_bits(&s->pb, 1, 0); /* extra bit picture */ + + /* only one slice */ + put_header(s, SLICE_MIN_START_CODE); + put_bits(&s->pb, 5, s->qscale); /* quantizer scale */ + put_bits(&s->pb, 1, 0); /* slice extra information */ +} + +void mpeg1_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int mb_incr, i, cbp, mb_x, mb_y; + + mb_x = s->mb_x; + mb_y = s->mb_y; + + /* compute cbp */ + cbp = 0; + for(i=0;i<6;i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + + /* skip macroblock, except if first or last macroblock of a slice */ + if ((cbp | motion_x | motion_y) == 0 && + (!((mb_x | mb_y) == 0 || + (mb_x == s->mb_width - 1 && mb_y == s->mb_height - 1)))) { + s->mb_incr++; + } else { + /* output mb incr */ + mb_incr = s->mb_incr; + + while (mb_incr > 33) { + put_bits(&s->pb, 11, 0x008); + mb_incr -= 33; + } + put_bits(&s->pb, mbAddrIncrTable[mb_incr - 1][1], + mbAddrIncrTable[mb_incr - 1][0]); + + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, 1, 1); /* macroblock_type : macroblock_quant = 0 */ + } else { + if (s->mb_intra) { + put_bits(&s->pb, 5, 0x03); + } else { + if (cbp != 0) { + if (motion_x == 0 && motion_y == 0) { + put_bits(&s->pb, 2, 1); /* macroblock_pattern only */ + put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); + } else { + put_bits(&s->pb, 1, 1); /* motion + cbp */ + mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0]); + mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1]); + put_bits(&s->pb, mbPatTable[cbp - 1][1], mbPatTable[cbp - 1][0]); + } + } else { + put_bits(&s->pb, 3, 1); /* motion only */ + mpeg1_encode_motion(s, motion_x - s->last_mv[0][0][0]); + mpeg1_encode_motion(s, motion_y - s->last_mv[0][0][1]); + } + } + } + for(i=0;i<6;i++) { + if (cbp & (1 << (5 - i))) { + mpeg1_encode_block(s, block[i], i); + } + } + s->mb_incr = 1; + } + s->last_mv[0][0][0] = motion_x; + s->last_mv[0][0][1] = motion_y; +} + +static void mpeg1_encode_motion(MpegEncContext *s, int val) +{ + int code, bit_size, l, m, bits, range, sign; + + if (val == 0) { + /* zero vector */ + code = 0; + put_bits(&s->pb, + mbMotionVectorTable[0][1], + mbMotionVectorTable[0][0]); + } else { + bit_size = s->f_code - 1; + range = 1 << bit_size; + /* modulo encoding */ + l = 16 * range; + m = 2 * l; + if (val < -l) { + val += m; + } else if (val >= l) { + val -= m; + } + + if (val >= 0) { + val--; + code = (val >> bit_size) + 1; + bits = val & (range - 1); + sign = 0; + } else { + val = -val; + val--; + code = (val >> bit_size) + 1; + bits = val & (range - 1); + sign = 1; + } + put_bits(&s->pb, + mbMotionVectorTable[code][1], + mbMotionVectorTable[code][0]); + put_bits(&s->pb, 1, sign); + if (bit_size > 0) { + put_bits(&s->pb, bit_size, bits); + } + } +} + +static inline void encode_dc(MpegEncContext *s, int diff, int component) +{ + int adiff, index; + + adiff = abs(diff); + index = vlc_dc_table[adiff]; + if (component == 0) { + put_bits(&s->pb, vlc_dc_lum_bits[index], vlc_dc_lum_code[index]); + } else { + put_bits(&s->pb, vlc_dc_chroma_bits[index], vlc_dc_chroma_code[index]); + } + if (diff > 0) { + put_bits(&s->pb, index, (diff & ((1 << index) - 1))); + } else if (diff < 0) { + put_bits(&s->pb, index, ((diff - 1) & ((1 << index) - 1))); + } +} + +static void mpeg1_encode_block(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int alevel, level, last_non_zero, dc, diff, i, j, run, last_index, sign; + int code, component; + RLTable *rl = &rl_mpeg1; + + last_index = s->block_last_index[n]; + + /* DC coef */ + if (s->mb_intra) { + component = (n <= 3 ? 0 : n - 4 + 1); + dc = block[0]; /* overflow is impossible */ + diff = dc - s->last_dc[component]; + encode_dc(s, diff, component); + s->last_dc[component] = dc; + i = 1; + } else { + /* encode the first coefficient : needs to be done here because + it is handled slightly differently */ + level = block[0]; + if (abs(level) == 1) { + code = ((UINT32)level >> 31); /* the sign bit */ + put_bits(&s->pb, 2, code | 0x02); + i = 1; + } else { + i = 0; + last_non_zero = -1; + goto next_coef; + } + } + + /* now quantify & encode AC coefs */ + last_non_zero = i - 1; + for(;i<=last_index;i++) { + j = zigzag_direct[i]; + level = block[j]; + next_coef: +#if 0 + if (level != 0) + dprintf("level[%d]=%d\n", i, level); +#endif + /* encode using VLC */ + if (level != 0) { + run = i - last_non_zero - 1; + sign = 0; + alevel = level; + if (alevel < 0) { + sign = 1; + alevel = -alevel; + } + code = get_rl_index(rl, 0, run, alevel); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code != rl->n) { + put_bits(&s->pb, 1, sign); + } else { + /* escape: only clip in this case */ + put_bits(&s->pb, 6, run); + if (alevel < 128) { + put_bits(&s->pb, 8, level & 0xff); + } else { + if (level < 0) { + put_bits(&s->pb, 16, 0x8001 + level + 255); + } else { + put_bits(&s->pb, 16, level & 0xffff); + } + } + } + last_non_zero = i; + } + } + /* end of block */ + put_bits(&s->pb, 2, 0x2); +} + +/******************************************/ +/* decoding */ + +static VLC dc_lum_vlc; +static VLC dc_chroma_vlc; +static VLC mv_vlc; +static VLC mbincr_vlc; +static VLC mb_ptype_vlc; +static VLC mb_btype_vlc; +static VLC mb_pat_vlc; + +void mpeg1_init_vlc(MpegEncContext *s) +{ + static int done = 0; + + if (!done) { + + init_vlc(&dc_lum_vlc, 9, 12, + vlc_dc_lum_bits, 1, 1, + vlc_dc_lum_code, 2, 2); + init_vlc(&dc_chroma_vlc, 9, 12, + vlc_dc_chroma_bits, 1, 1, + vlc_dc_chroma_code, 2, 2); + init_vlc(&mv_vlc, 9, 17, + &mbMotionVectorTable[0][1], 2, 1, + &mbMotionVectorTable[0][0], 2, 1); + init_vlc(&mbincr_vlc, 9, 34, + &mbAddrIncrTable[0][1], 2, 1, + &mbAddrIncrTable[0][0], 2, 1); + init_vlc(&mb_pat_vlc, 9, 63, + &mbPatTable[0][1], 2, 1, + &mbPatTable[0][0], 2, 1); + + init_vlc(&mb_ptype_vlc, 6, 32, + &table_mb_ptype[0][1], 2, 1, + &table_mb_ptype[0][0], 2, 1); + init_vlc(&mb_btype_vlc, 6, 32, + &table_mb_btype[0][1], 2, 1, + &table_mb_btype[0][0], 2, 1); + init_rl(&rl_mpeg1); + init_rl(&rl_mpeg2); + /* cannot use generic init because we must add the EOB code */ + init_vlc(&rl_mpeg1.vlc, 9, rl_mpeg1.n + 2, + &rl_mpeg1.table_vlc[0][1], 4, 2, + &rl_mpeg1.table_vlc[0][0], 4, 2); + init_vlc(&rl_mpeg2.vlc, 9, rl_mpeg2.n + 2, + &rl_mpeg2.table_vlc[0][1], 4, 2, + &rl_mpeg2.table_vlc[0][0], 4, 2); + } +} + +static inline int get_dmv(MpegEncContext *s) +{ + if(get_bits(&s->gb, 1)) + return 1 - (get_bits(&s->gb, 1) << 1); + else + return 0; +} + +/* motion type (for mpeg2) */ +#define MT_FIELD 1 +#define MT_FRAME 2 +#define MT_16X8 2 +#define MT_DMV 3 + +static int mpeg_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int i, j, k, cbp, val, code, mb_type, motion_type; + + /* skip mb handling */ + if (s->mb_incr == 0) { + /* read again increment */ + s->mb_incr = 1; + for(;;) { + code = get_vlc(&s->gb, &mbincr_vlc); + if (code < 0) + return 1; /* error = end of slice */ + if (code >= 33) { + if (code == 33) { + s->mb_incr += 33; + } + /* otherwise, stuffing, nothing to do */ + } else { + s->mb_incr += code; + break; + } + } + } + if (++s->mb_x >= s->mb_width) { + s->mb_x = 0; + if (s->mb_y >= (s->mb_height - 1)) + return -1; + s->mb_y++; + } + dprintf("decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y); + + if (--s->mb_incr != 0) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_type = MV_TYPE_16X16; + if (s->pict_type == P_TYPE) { + /* if P type, zero motion vector is implied */ + s->mv_dir = MV_DIR_FORWARD; + s->mv[0][0][0] = s->mv[0][0][1] = 0; + s->last_mv[0][0][0] = s->last_mv[0][0][1] = 0; + } else { + /* if B type, reuse previous vectors and directions */ + s->mv[0][0][0] = s->last_mv[0][0][0]; + s->mv[0][0][1] = s->last_mv[0][0][1]; + s->mv[1][0][0] = s->last_mv[1][0][0]; + s->mv[1][0][1] = s->last_mv[1][0][1]; + } + return 0; + } + + switch(s->pict_type) { + default: + case I_TYPE: + if (get_bits(&s->gb, 1) == 0) { + if (get_bits(&s->gb, 1) == 0) + return -1; + mb_type = MB_QUANT | MB_INTRA; + } else { + mb_type = MB_INTRA; + } + break; + case P_TYPE: + mb_type = get_vlc(&s->gb, &mb_ptype_vlc); + if (mb_type < 0) + return -1; + break; + case B_TYPE: + mb_type = get_vlc(&s->gb, &mb_btype_vlc); + if (mb_type < 0) + return -1; + break; + } + dprintf("mb_type=%x\n", mb_type); + motion_type = 0; /* avoid warning */ + if (mb_type & (MB_FOR|MB_BACK)) { + /* get additionnal motion vector type */ + if (s->picture_structure == PICT_FRAME && s->frame_pred_frame_dct) + motion_type = MT_FRAME; + else + motion_type = get_bits(&s->gb, 2); + } + /* compute dct type */ + if (s->picture_structure == PICT_FRAME && + !s->frame_pred_frame_dct && + (mb_type & (MB_PAT | MB_INTRA))) { + s->interlaced_dct = get_bits(&s->gb, 1); +#ifdef DEBUG + if (s->interlaced_dct) + printf("interlaced_dct\n"); +#endif + } else { + s->interlaced_dct = 0; /* frame based */ + } + + if (mb_type & MB_QUANT) { + if (s->mpeg2) { + if (s->q_scale_type) { + s->qscale = non_linear_qscale[get_bits(&s->gb, 5)]; + } else { + s->qscale = get_bits(&s->gb, 5) << 1; + } + } else { + /* for mpeg1, we use the generic unquant code */ + s->qscale = get_bits(&s->gb, 5); + } + } + if (mb_type & MB_INTRA) { + if (s->concealment_motion_vectors) { + /* just parse them */ + if (s->picture_structure != PICT_FRAME) + get_bits(&s->gb, 1); /* field select */ + mpeg_decode_motion(s, s->mpeg_f_code[0][0], 0); + mpeg_decode_motion(s, s->mpeg_f_code[0][1], 0); + } + s->mb_intra = 1; + cbp = 0x3f; + memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */ + } else { + s->mb_intra = 0; + cbp = 0; + } + /* special case of implicit zero motion vector */ + if (s->pict_type == P_TYPE && !(mb_type & MB_FOR)) { + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->last_mv[0][0][0] = 0; + s->last_mv[0][0][1] = 0; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + } else if (mb_type & (MB_FOR | MB_BACK)) { + /* motion vectors */ + s->mv_dir = 0; + for(i=0;i<2;i++) { + if (mb_type & (MB_FOR >> i)) { + s->mv_dir |= (MV_DIR_FORWARD >> i); + dprintf("mv_type=%d\n", motion_type); + switch(motion_type) { + case MT_FRAME: /* or MT_16X8 */ + if (s->picture_structure == PICT_FRAME) { + /* MT_FRAME */ + s->mv_type = MV_TYPE_16X16; + for(k=0;k<2;k++) { + val = mpeg_decode_motion(s, s->mpeg_f_code[i][k], + s->last_mv[i][0][k]); + s->last_mv[i][0][k] = val; + s->last_mv[i][1][k] = val; + /* full_pel: only for mpeg1 */ + if (s->full_pel[i]) + val = val << 1; + s->mv[i][0][k] = val; + dprintf("mv%d: %d\n", k, val); + } + } else { + /* MT_16X8 */ + s->mv_type = MV_TYPE_16X8; + for(j=0;j<2;j++) { + s->field_select[i][j] = get_bits(&s->gb, 1); + for(k=0;k<2;k++) { + val = mpeg_decode_motion(s, s->mpeg_f_code[i][k], + s->last_mv[i][j][k]); + s->last_mv[i][j][k] = val; + s->mv[i][j][k] = val; + } + } + } + break; + case MT_FIELD: + if (s->picture_structure == PICT_FRAME) { + s->mv_type = MV_TYPE_FIELD; + for(j=0;j<2;j++) { + s->field_select[i][j] = get_bits(&s->gb, 1); + val = mpeg_decode_motion(s, s->mpeg_f_code[i][0], + s->last_mv[i][j][0]); + s->last_mv[i][j][0] = val; + s->mv[i][j][0] = val; + dprintf("fmx=%d\n", val); + val = mpeg_decode_motion(s, s->mpeg_f_code[i][1], + s->last_mv[i][j][1] >> 1); + s->last_mv[i][j][1] = val << 1; + s->mv[i][j][1] = val; + dprintf("fmy=%d\n", val); + } + } else { + s->mv_type = MV_TYPE_16X16; + s->field_select[i][0] = get_bits(&s->gb, 1); + for(k=0;k<2;k++) { + val = mpeg_decode_motion(s, s->mpeg_f_code[i][k], + s->last_mv[i][0][k]); + s->last_mv[i][0][k] = val; + s->last_mv[i][1][k] = val; + s->mv[i][0][k] = val; + } + } + break; + case MT_DMV: + { + int dmx, dmy, mx, my, m; + + mx = mpeg_decode_motion(s, s->mpeg_f_code[i][0], + s->last_mv[i][0][0]); + s->last_mv[i][0][0] = mx; + s->last_mv[i][1][0] = mx; + dmx = get_dmv(s); + my = mpeg_decode_motion(s, s->mpeg_f_code[i][1], + s->last_mv[i][0][1] >> 1); + dmy = get_dmv(s); + s->mv_type = MV_TYPE_DMV; + /* XXX: totally broken */ + if (s->picture_structure == PICT_FRAME) { + s->last_mv[i][0][1] = my << 1; + s->last_mv[i][1][1] = my << 1; + + m = s->top_field_first ? 1 : 3; + /* top -> top pred */ + s->mv[i][0][0] = mx; + s->mv[i][0][1] = my << 1; + s->mv[i][1][0] = ((mx * m + (mx > 0)) >> 1) + dmx; + s->mv[i][1][1] = ((my * m + (my > 0)) >> 1) + dmy - 1; + m = 4 - m; + s->mv[i][2][0] = mx; + s->mv[i][2][1] = my << 1; + s->mv[i][3][0] = ((mx * m + (mx > 0)) >> 1) + dmx; + s->mv[i][3][1] = ((my * m + (my > 0)) >> 1) + dmy + 1; + } else { + s->last_mv[i][0][1] = my; + s->last_mv[i][1][1] = my; + s->mv[i][0][0] = mx; + s->mv[i][0][1] = my; + s->mv[i][1][0] = ((mx + (mx > 0)) >> 1) + dmx; + s->mv[i][1][1] = ((my + (my > 0)) >> 1) + dmy - 1 + /* + 2 * cur_field */; + } + } + break; + } + } + } + } + + if ((mb_type & MB_INTRA) && s->concealment_motion_vectors) { + get_bits(&s->gb, 1); /* marker */ + } + + if (mb_type & MB_PAT) { + cbp = get_vlc(&s->gb, &mb_pat_vlc); + if (cbp < 0) + return -1; + cbp++; + } + dprintf("cbp=%x\n", cbp); + + if (s->mpeg2) { + if (s->mb_intra) { + for(i=0;i<6;i++) { + if (cbp & (1 << (5 - i))) { + if (mpeg2_decode_block_intra(s, block[i], i) < 0) + return -1; + } + } + } else { + for(i=0;i<6;i++) { + if (cbp & (1 << (5 - i))) { + if (mpeg2_decode_block_non_intra(s, block[i], i) < 0) + return -1; + } + } + } + } else { + for(i=0;i<6;i++) { + if (cbp & (1 << (5 - i))) { + if (mpeg1_decode_block(s, block[i], i) < 0) + return -1; + } + } + } + return 0; +} + +/* as h263, but only 17 codes */ +static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred) +{ + int code, sign, val, m, l, shift; + + code = get_vlc(&s->gb, &mv_vlc); + if (code < 0) { + return 0xffff; + } + if (code == 0) { + return pred; + } + sign = get_bits(&s->gb, 1); + shift = fcode - 1; + val = (code - 1) << shift; + if (shift > 0) + val |= get_bits(&s->gb, shift); + val++; + if (sign) + val = -val; + val += pred; + + /* modulo decoding */ + l = (1 << shift) * 16; + m = 2 * l; + if (val < -l) { + val += m; + } else if (val >= l) { + val -= m; + } + return val; +} + +static inline int decode_dc(MpegEncContext *s, int component) +{ + int code, diff; + + if (component == 0) { + code = get_vlc(&s->gb, &dc_lum_vlc); + } else { + code = get_vlc(&s->gb, &dc_chroma_vlc); + } + if (code < 0) + return 0xffff; + if (code == 0) { + diff = 0; + } else { + diff = get_bits(&s->gb, code); + if ((diff & (1 << (code - 1))) == 0) + diff = (-1 << code) | (diff + 1); + } + return diff; +} + +static int mpeg1_decode_block(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, dc, diff, i, j, run; + int code, component; + RLTable *rl = &rl_mpeg1; + + if (s->mb_intra) { + /* DC coef */ + component = (n <= 3 ? 0 : n - 4 + 1); + diff = decode_dc(s, component); + if (diff >= 0xffff) + return -1; + dc = s->last_dc[component]; + dc += diff; + s->last_dc[component] = dc; + block[0] = dc; + dprintf("dc=%d diff=%d\n", dc, diff); + i = 1; + } else { + int bit_cnt, v; + UINT32 bit_buf; + UINT8 *buf_ptr; + i = 0; + /* special case for the first coef. no need to add a second vlc table */ + SAVE_BITS(&s->gb); + SHOW_BITS(&s->gb, v, 2); + if (v & 2) { + run = 0; + level = 1 - ((v & 1) << 1); + FLUSH_BITS(2); + RESTORE_BITS(&s->gb); + goto add_coef; + } + RESTORE_BITS(&s->gb); + } + + /* now quantify & encode AC coefs */ + for(;;) { + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0) { + return -1; + } + if (code == 112) { + break; + } else if (code == 111) { + /* escape */ + run = get_bits(&s->gb, 6); + level = get_bits(&s->gb, 8); + level = (level << 24) >> 24; + if (level == -128) { + level = get_bits(&s->gb, 8) - 256; + } else if (level == 0) { + level = get_bits(&s->gb, 8); + } + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + if (get_bits(&s->gb, 1)) + level = -level; + } + i += run; + if (i >= 64) + return -1; + add_coef: + dprintf("%d: run=%d level=%d\n", n, run, level); + j = zigzag_direct[i]; + block[j] = level; + i++; + } + s->block_last_index[n] = i; + return 0; +} + +/* Also does unquantization here, since I will never support mpeg2 + encoding */ +static int mpeg2_decode_block_non_intra(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, i, j, run; + int code; + RLTable *rl = &rl_mpeg1; + const UINT8 *scan_table; + const UINT16 *matrix; + int mismatch; + + if (s->alternate_scan) + scan_table = ff_alternate_vertical_scan; + else + scan_table = zigzag_direct; + mismatch = 1; + + { + int bit_cnt, v; + UINT32 bit_buf; + UINT8 *buf_ptr; + i = 0; + if (n < 4) + matrix = s->non_intra_matrix; + else + matrix = s->chroma_non_intra_matrix; + + /* special case for the first coef. no need to add a second vlc table */ + SAVE_BITS(&s->gb); + SHOW_BITS(&s->gb, v, 2); + if (v & 2) { + run = 0; + level = 1 - ((v & 1) << 1); + FLUSH_BITS(2); + RESTORE_BITS(&s->gb); + goto add_coef; + } + RESTORE_BITS(&s->gb); + } + + /* now quantify & encode AC coefs */ + for(;;) { + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0) + return -1; + if (code == 112) { + break; + } else if (code == 111) { + /* escape */ + run = get_bits(&s->gb, 6); + level = get_bits(&s->gb, 12); + level = (level << 20) >> 20; + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + if (get_bits(&s->gb, 1)) + level = -level; + } + i += run; + if (i >= 64) + return -1; + add_coef: + j = scan_table[i]; + dprintf("%d: run=%d level=%d\n", n, run, level); + level = ((level * 2 + 1) * s->qscale * matrix[j]) / 32; + /* XXX: is it really necessary to saturate since the encoder + knows whats going on ? */ + mismatch ^= level; + block[j] = level; + i++; + } + block[63] ^= (mismatch & 1); + s->block_last_index[n] = i; + return 0; +} + +static int mpeg2_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, dc, diff, i, j, run; + int code, component; + RLTable *rl; + const UINT8 *scan_table; + const UINT16 *matrix; + int mismatch; + + if (s->alternate_scan) + scan_table = ff_alternate_vertical_scan; + else + scan_table = zigzag_direct; + mismatch = 1; + + /* DC coef */ + component = (n <= 3 ? 0 : n - 4 + 1); + diff = decode_dc(s, component); + if (diff >= 0xffff) + return -1; + dc = s->last_dc[component]; + dc += diff; + s->last_dc[component] = dc; + block[0] = dc << (3 - s->intra_dc_precision); + dprintf("dc=%d\n", block[0]); + i = 1; + if (s->intra_vlc_format) + rl = &rl_mpeg2; + else + rl = &rl_mpeg1; + if (n < 4) + matrix = s->intra_matrix; + else + matrix = s->chroma_intra_matrix; + + /* now quantify & encode AC coefs */ + for(;;) { + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0) + return -1; + if (code == 112) { + break; + } else if (code == 111) { + /* escape */ + run = get_bits(&s->gb, 6); + level = get_bits(&s->gb, 12); + level = (level << 20) >> 20; + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + if (get_bits(&s->gb, 1)) + level = -level; + } + i += run; + if (i >= 64) + return -1; + j = scan_table[i]; + dprintf("%d: run=%d level=%d\n", n, run, level); + level = (level * s->qscale * matrix[j]) / 16; + /* XXX: is it really necessary to saturate since the encoder + knows whats going on ? */ + mismatch ^= level; + block[j] = level; + i++; + } + block[63] ^= (mismatch & 1); + s->block_last_index[n] = i; + return 0; +} + +/* compressed picture size */ +#define PICTURE_BUFFER_SIZE 100000 + +typedef struct Mpeg1Context { + MpegEncContext mpeg_enc_ctx; + UINT32 header_state; + int start_code; /* current start code */ + UINT8 buffer[PICTURE_BUFFER_SIZE]; + UINT8 *buf_ptr; + int buffer_size; + int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ +} Mpeg1Context; + +static int mpeg_decode_init(AVCodecContext *avctx) +{ + Mpeg1Context *s = avctx->priv_data; + + s->header_state = 0xff; + s->mpeg_enc_ctx_allocated = 0; + s->buffer_size = PICTURE_BUFFER_SIZE; + s->start_code = -1; + s->buf_ptr = s->buffer; + s->mpeg_enc_ctx.picture_number = 0; + return 0; +} + +/* return the 8 bit start code value and update the search + state. Return -1 if no start code found */ +static int find_start_code(UINT8 **pbuf_ptr, UINT8 *buf_end, + UINT32 *header_state) +{ + UINT8 *buf_ptr; + unsigned int state, v; + int val; + + state = *header_state; + buf_ptr = *pbuf_ptr; + while (buf_ptr < buf_end) { + v = *buf_ptr++; + if (state == 0x000001) { + state = ((state << 8) | v) & 0xffffff; + val = state; + goto found; + } + state = ((state << 8) | v) & 0xffffff; + } + val = -1; + found: + *pbuf_ptr = buf_ptr; + *header_state = state; + return val; +} + +static int mpeg1_decode_picture(AVCodecContext *avctx, + UINT8 *buf, int buf_size) +{ + Mpeg1Context *s1 = avctx->priv_data; + MpegEncContext *s = &s1->mpeg_enc_ctx; + int ref, f_code; + + init_get_bits(&s->gb, buf, buf_size); + + ref = get_bits(&s->gb, 10); /* temporal ref */ + s->pict_type = get_bits(&s->gb, 3); + dprintf("pict_type=%d\n", s->pict_type); + get_bits(&s->gb, 16); + if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) { + s->full_pel[0] = get_bits(&s->gb, 1); + f_code = get_bits(&s->gb, 3); + if (f_code == 0) + return -1; + s->mpeg_f_code[0][0] = f_code; + s->mpeg_f_code[0][1] = f_code; + } + if (s->pict_type == B_TYPE) { + s->full_pel[1] = get_bits(&s->gb, 1); + f_code = get_bits(&s->gb, 3); + if (f_code == 0) + return -1; + s->mpeg_f_code[1][0] = f_code; + s->mpeg_f_code[1][1] = f_code; + } + s->y_dc_scale = 8; + s->c_dc_scale = 8; + s->first_slice = 1; + return 0; +} + +static void mpeg_decode_sequence_extension(MpegEncContext *s) +{ + int horiz_size_ext, vert_size_ext; + int bit_rate_ext, vbv_buf_ext, low_delay; + int frame_rate_ext_n, frame_rate_ext_d; + + get_bits(&s->gb, 8); /* profil and level */ + get_bits(&s->gb, 1); /* progressive_sequence */ + get_bits(&s->gb, 2); /* chroma_format */ + horiz_size_ext = get_bits(&s->gb, 2); + vert_size_ext = get_bits(&s->gb, 2); + s->width |= (horiz_size_ext << 12); + s->height |= (vert_size_ext << 12); + bit_rate_ext = get_bits(&s->gb, 12); /* XXX: handle it */ + s->bit_rate = ((s->bit_rate / 400) | (bit_rate_ext << 12)) * 400; + get_bits(&s->gb, 1); /* marker */ + vbv_buf_ext = get_bits(&s->gb, 8); + low_delay = get_bits(&s->gb, 1); + frame_rate_ext_n = get_bits(&s->gb, 2); + frame_rate_ext_d = get_bits(&s->gb, 5); + if (frame_rate_ext_d >= 1) + s->frame_rate = (s->frame_rate * frame_rate_ext_n) / frame_rate_ext_d; + dprintf("sequence extension\n"); + s->mpeg2 = 1; +} + +static void mpeg_decode_quant_matrix_extension(MpegEncContext *s) +{ + int i, v; + + if (get_bits(&s->gb, 1)) { + for(i=0;i<64;i++) { + v = get_bits(&s->gb, 8); + s->intra_matrix[i] = v; + s->chroma_intra_matrix[i] = v; + } + } + if (get_bits(&s->gb, 1)) { + for(i=0;i<64;i++) { + v = get_bits(&s->gb, 8); + s->non_intra_matrix[i] = v; + s->chroma_non_intra_matrix[i] = v; + } + } + if (get_bits(&s->gb, 1)) { + for(i=0;i<64;i++) { + v = get_bits(&s->gb, 8); + s->chroma_intra_matrix[i] = v; + } + } + if (get_bits(&s->gb, 1)) { + for(i=0;i<64;i++) { + v = get_bits(&s->gb, 8); + s->chroma_non_intra_matrix[i] = v; + } + } +} + +static void mpeg_decode_picture_coding_extension(MpegEncContext *s) +{ + s->full_pel[0] = s->full_pel[1] = 0; + s->mpeg_f_code[0][0] = get_bits(&s->gb, 4); + s->mpeg_f_code[0][1] = get_bits(&s->gb, 4); + s->mpeg_f_code[1][0] = get_bits(&s->gb, 4); + s->mpeg_f_code[1][1] = get_bits(&s->gb, 4); + s->intra_dc_precision = get_bits(&s->gb, 2); + s->picture_structure = get_bits(&s->gb, 2); + s->top_field_first = get_bits(&s->gb, 1); + s->frame_pred_frame_dct = get_bits(&s->gb, 1); + s->concealment_motion_vectors = get_bits(&s->gb, 1); + s->q_scale_type = get_bits(&s->gb, 1); + s->intra_vlc_format = get_bits(&s->gb, 1); + s->alternate_scan = get_bits(&s->gb, 1); + s->repeat_first_field = get_bits(&s->gb, 1); + s->chroma_420_type = get_bits(&s->gb, 1); + s->progressive_frame = get_bits(&s->gb, 1); + /* composite display not parsed */ + dprintf("dc_preci=%d\n", s->intra_dc_precision); + dprintf("pict_structure=%d\n", s->picture_structure); + dprintf("conceal=%d\n", s->concealment_motion_vectors); + dprintf("intrafmt=%d\n", s->intra_vlc_format); + dprintf("frame_pred_frame_dct=%d\n", s->frame_pred_frame_dct); +} + +static void mpeg_decode_extension(AVCodecContext *avctx, + UINT8 *buf, int buf_size) +{ + Mpeg1Context *s1 = avctx->priv_data; + MpegEncContext *s = &s1->mpeg_enc_ctx; + int ext_type; + + init_get_bits(&s->gb, buf, buf_size); + + ext_type = get_bits(&s->gb, 4); + switch(ext_type) { + case 0x1: + /* sequence ext */ + mpeg_decode_sequence_extension(s); + break; + case 0x3: + /* quant matrix extension */ + mpeg_decode_quant_matrix_extension(s); + break; + case 0x8: + /* picture extension */ + mpeg_decode_picture_coding_extension(s); + break; + } +} + +/* return 1 if end of frame */ +static int mpeg_decode_slice(AVCodecContext *avctx, + AVPicture *pict, + int start_code, + UINT8 *buf, int buf_size) +{ + Mpeg1Context *s1 = avctx->priv_data; + MpegEncContext *s = &s1->mpeg_enc_ctx; + int ret; + DCTELEM block[6][64]; + + start_code = (start_code - 1) & 0xff; + if (start_code >= s->mb_height) + return -1; + s->last_dc[0] = 1 << (7 + s->intra_dc_precision); + s->last_dc[1] = s->last_dc[0]; + s->last_dc[2] = s->last_dc[0]; + memset(s->last_mv, 0, sizeof(s->last_mv)); + s->mb_x = -1; + s->mb_y = start_code; + s->mb_incr = 0; + + /* start frame decoding */ + if (s->first_slice) { + s->first_slice = 0; + MPV_frame_start(s); + } + + init_get_bits(&s->gb, buf, buf_size); + + s->qscale = get_bits(&s->gb, 5); + /* extra slice info */ + while (get_bits(&s->gb, 1) != 0) { + get_bits(&s->gb, 8); + } + + for(;;) { + memset(block, 0, sizeof(block)); + ret = mpeg_decode_mb(s, block); + dprintf("ret=%d\n", ret); + if (ret < 0) + return -1; + if (ret == 1) + break; + MPV_decode_mb(s, block); + } + + /* end of slice reached */ + if (s->mb_x == (s->mb_width - 1) && + s->mb_y == (s->mb_height - 1)) { + /* end of image */ + UINT8 **picture; + + MPV_frame_end(s); + + /* XXX: incorrect reported qscale for mpeg2 */ + if (s->pict_type == B_TYPE) { + picture = s->current_picture; + avctx->quality = s->qscale; + } else { + /* latency of 1 frame for I and P frames */ + /* XXX: use another variable than picture_number */ + if (s->picture_number == 0) { + picture = NULL; + } else { + picture = s->last_picture; + avctx->quality = s->last_qscale; + } + s->last_qscale = s->qscale; + s->picture_number++; + } + if (picture) { + pict->data[0] = picture[0]; + pict->data[1] = picture[1]; + pict->data[2] = picture[2]; + pict->linesize[0] = s->linesize; + pict->linesize[1] = s->linesize / 2; + pict->linesize[2] = s->linesize / 2; + return 1; + } else { + return 0; + } + } else { + return 0; + } +} + +static int mpeg1_decode_sequence(AVCodecContext *avctx, + UINT8 *buf, int buf_size) +{ + Mpeg1Context *s1 = avctx->priv_data; + MpegEncContext *s = &s1->mpeg_enc_ctx; + int width, height, i, v; + + init_get_bits(&s->gb, buf, buf_size); + + width = get_bits(&s->gb, 12); + height = get_bits(&s->gb, 12); + get_bits(&s->gb, 4); + s->frame_rate_index = get_bits(&s->gb, 4); + if (s->frame_rate_index == 0) + return -1; + s->bit_rate = get_bits(&s->gb, 18) * 400; + if (get_bits(&s->gb, 1) == 0) /* marker */ + return -1; + if (width <= 0 || height <= 0 || + (width % 2) != 0 || (height % 2) != 0) + return -1; + if (width != s->width || + height != s->height) { + /* start new mpeg1 context decoding */ + s->out_format = FMT_MPEG1; + if (s1->mpeg_enc_ctx_allocated) { + MPV_common_end(s); + } + s->width = width; + s->height = height; + s->has_b_frames = 1; + avctx->width = width; + avctx->height = height; + avctx->frame_rate = frame_rate_tab[s->frame_rate_index]; + avctx->bit_rate = s->bit_rate; + + if (MPV_common_init(s) < 0) + return -1; + mpeg1_init_vlc(s); + s1->mpeg_enc_ctx_allocated = 1; + } + + get_bits(&s->gb, 10); /* vbv_buffer_size */ + get_bits(&s->gb, 1); + + /* get matrix */ + if (get_bits(&s->gb, 1)) { + for(i=0;i<64;i++) { + v = get_bits(&s->gb, 8); + s->intra_matrix[i] = v; + s->chroma_intra_matrix[i] = v; + } + } else { + for(i=0;i<64;i++) { + v = default_intra_matrix[i]; + s->intra_matrix[i] = v; + s->chroma_intra_matrix[i] = v; + } + } + if (get_bits(&s->gb, 1)) { + for(i=0;i<64;i++) { + v = get_bits(&s->gb, 8); + s->non_intra_matrix[i] = v; + s->chroma_non_intra_matrix[i] = v; + } + } else { + for(i=0;i<64;i++) { + v = default_non_intra_matrix[i]; + s->non_intra_matrix[i] = v; + s->chroma_non_intra_matrix[i] = v; + } + } + + /* we set mpeg2 parameters so that it emulates mpeg1 */ + s->progressive_sequence = 1; + s->progressive_frame = 1; + s->picture_structure = PICT_FRAME; + s->frame_pred_frame_dct = 1; + s->mpeg2 = 0; + return 0; +} + +/* handle buffering and image synchronisation */ +static int mpeg_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + Mpeg1Context *s = avctx->priv_data; + UINT8 *buf_end, *buf_ptr, *buf_start; + int len, start_code_found, ret, code, start_code, input_size; + AVPicture *picture = data; + + dprintf("fill_buffer\n"); + + *data_size = 0; + /* special case for last picture */ + if (buf_size == 0) { + MpegEncContext *s2 = &s->mpeg_enc_ctx; + if (s2->picture_number > 0) { + picture->data[0] = s2->next_picture[0]; + picture->data[1] = s2->next_picture[1]; + picture->data[2] = s2->next_picture[2]; + picture->linesize[0] = s2->linesize; + picture->linesize[1] = s2->linesize / 2; + picture->linesize[2] = s2->linesize / 2; + *data_size = sizeof(AVPicture); + } + return 0; + } + + buf_ptr = buf; + buf_end = buf + buf_size; + while (buf_ptr < buf_end) { + buf_start = buf_ptr; + /* find start next code */ + code = find_start_code(&buf_ptr, buf_end, &s->header_state); + if (code >= 0) { + start_code_found = 1; + } else { + start_code_found = 0; + } + /* copy to buffer */ + len = buf_ptr - buf_start; + if (len + (s->buf_ptr - s->buffer) > s->buffer_size) { + /* data too big : flush */ + s->buf_ptr = s->buffer; + if (start_code_found) + s->start_code = code; + } else { + memcpy(s->buf_ptr, buf_start, len); + s->buf_ptr += len; + + if (start_code_found) { + /* prepare data for next start code */ + input_size = s->buf_ptr - s->buffer; + start_code = s->start_code; + s->buf_ptr = s->buffer; + s->start_code = code; + switch(start_code) { + case SEQ_START_CODE: + mpeg1_decode_sequence(avctx, s->buffer, + input_size); + break; + + case PICTURE_START_CODE: + /* we have a complete image : we try to decompress it */ + mpeg1_decode_picture(avctx, + s->buffer, input_size); + break; + case EXT_START_CODE: + mpeg_decode_extension(avctx, + s->buffer, input_size); + break; + default: + if (start_code >= SLICE_MIN_START_CODE && + start_code <= SLICE_MAX_START_CODE) { + ret = mpeg_decode_slice(avctx, picture, + start_code, s->buffer, input_size); + if (ret == 1) { + /* got a picture: exit */ + *data_size = sizeof(AVPicture); + goto the_end; + } + } + break; + } + } + } + } + the_end: + return buf_ptr - buf; +} + +static int mpeg_decode_end(AVCodecContext *avctx) +{ + Mpeg1Context *s = avctx->priv_data; + + if (s->mpeg_enc_ctx_allocated) + MPV_common_end(&s->mpeg_enc_ctx); + return 0; +} + +AVCodec mpeg_decoder = { + "mpegvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_MPEG1VIDEO, + sizeof(Mpeg1Context), + mpeg_decode_init, + NULL, + mpeg_decode_end, + mpeg_decode_frame, +}; diff --git a/libavcodec/mpeg12data.h b/libavcodec/mpeg12data.h new file mode 100644 index 0000000000..2b5a9c2e22 --- /dev/null +++ b/libavcodec/mpeg12data.h @@ -0,0 +1,362 @@ +/* + * MPEG1/2 tables + */ + +const UINT8 default_intra_matrix[64] = { + 8, 16, 19, 22, 26, 27, 29, 34, + 16, 16, 22, 24, 27, 29, 34, 37, + 19, 22, 26, 27, 29, 34, 34, 38, + 22, 22, 26, 27, 29, 34, 37, 40, + 22, 26, 27, 29, 32, 35, 40, 48, + 26, 27, 29, 32, 35, 40, 48, 58, + 26, 27, 29, 34, 38, 46, 56, 69, + 27, 29, 35, 38, 46, 56, 69, 83 +}; + +const UINT8 default_non_intra_matrix[64] = { + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, +}; + +const unsigned char vlc_dc_table[256] = { + 0, 1, 2, 2, + 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +}; + +const UINT16 vlc_dc_lum_code[12] = { + 0x4, 0x0, 0x1, 0x5, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe, 0x1ff, +}; +const unsigned char vlc_dc_lum_bits[12] = { + 3, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 9, +}; + +const UINT16 vlc_dc_chroma_code[12] = { + 0x0, 0x1, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe, 0x3fe, 0x3ff, +}; +const unsigned char vlc_dc_chroma_bits[12] = { + 2, 2, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, +}; + +static const UINT16 mpeg1_vlc[113][2] = { + { 0x3, 2 }, { 0x4, 4 }, { 0x5, 5 }, { 0x6, 7 }, + { 0x26, 8 }, { 0x21, 8 }, { 0xa, 10 }, { 0x1d, 12 }, + { 0x18, 12 }, { 0x13, 12 }, { 0x10, 12 }, { 0x1a, 13 }, + { 0x19, 13 }, { 0x18, 13 }, { 0x17, 13 }, { 0x1f, 14 }, + { 0x1e, 14 }, { 0x1d, 14 }, { 0x1c, 14 }, { 0x1b, 14 }, + { 0x1a, 14 }, { 0x19, 14 }, { 0x18, 14 }, { 0x17, 14 }, + { 0x16, 14 }, { 0x15, 14 }, { 0x14, 14 }, { 0x13, 14 }, + { 0x12, 14 }, { 0x11, 14 }, { 0x10, 14 }, { 0x18, 15 }, + { 0x17, 15 }, { 0x16, 15 }, { 0x15, 15 }, { 0x14, 15 }, + { 0x13, 15 }, { 0x12, 15 }, { 0x11, 15 }, { 0x10, 15 }, + { 0x3, 3 }, { 0x6, 6 }, { 0x25, 8 }, { 0xc, 10 }, + { 0x1b, 12 }, { 0x16, 13 }, { 0x15, 13 }, { 0x1f, 15 }, + { 0x1e, 15 }, { 0x1d, 15 }, { 0x1c, 15 }, { 0x1b, 15 }, + { 0x1a, 15 }, { 0x19, 15 }, { 0x13, 16 }, { 0x12, 16 }, + { 0x11, 16 }, { 0x10, 16 }, { 0x5, 4 }, { 0x4, 7 }, + { 0xb, 10 }, { 0x14, 12 }, { 0x14, 13 }, { 0x7, 5 }, + { 0x24, 8 }, { 0x1c, 12 }, { 0x13, 13 }, { 0x6, 5 }, + { 0xf, 10 }, { 0x12, 12 }, { 0x7, 6 }, { 0x9, 10 }, + { 0x12, 13 }, { 0x5, 6 }, { 0x1e, 12 }, { 0x14, 16 }, + { 0x4, 6 }, { 0x15, 12 }, { 0x7, 7 }, { 0x11, 12 }, + { 0x5, 7 }, { 0x11, 13 }, { 0x27, 8 }, { 0x10, 13 }, + { 0x23, 8 }, { 0x1a, 16 }, { 0x22, 8 }, { 0x19, 16 }, + { 0x20, 8 }, { 0x18, 16 }, { 0xe, 10 }, { 0x17, 16 }, + { 0xd, 10 }, { 0x16, 16 }, { 0x8, 10 }, { 0x15, 16 }, + { 0x1f, 12 }, { 0x1a, 12 }, { 0x19, 12 }, { 0x17, 12 }, + { 0x16, 12 }, { 0x1f, 13 }, { 0x1e, 13 }, { 0x1d, 13 }, + { 0x1c, 13 }, { 0x1b, 13 }, { 0x1f, 16 }, { 0x1e, 16 }, + { 0x1d, 16 }, { 0x1c, 16 }, { 0x1b, 16 }, + { 0x1, 6 }, /* escape */ + { 0x2, 2 }, /* EOB */ +}; + +static const UINT16 mpeg2_vlc[113][2] = { + {0x02, 2}, {0x06, 3}, {0x07, 4}, {0x1c, 5}, + {0x1d, 5}, {0x05, 6}, {0x04, 6}, {0x7b, 7}, + {0x7c, 7}, {0x23, 8}, {0x22, 8}, {0xfa, 8}, + {0xfb, 8}, {0xfe, 8}, {0xff, 8}, {0x1f,14}, + {0x1e,14}, {0x1d,14}, {0x1c,14}, {0x1b,14}, + {0x1a,14}, {0x19,14}, {0x18,14}, {0x17,14}, + {0x16,14}, {0x15,14}, {0x14,14}, {0x13,14}, + {0x12,14}, {0x11,14}, {0x10,14}, {0x18,15}, + {0x17,15}, {0x16,15}, {0x15,15}, {0x14,15}, + {0x13,15}, {0x12,15}, {0x11,15}, {0x10,15}, + {0x02, 3}, {0x06, 5}, {0x79, 7}, {0x27, 8}, + {0x20, 8}, {0x16,13}, {0x15,13}, {0x1f,15}, + {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15}, + {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16}, + {0x11,16}, {0x10,16}, {0x05, 5}, {0x07, 7}, + {0xfc, 8}, {0x0c,10}, {0x14,13}, {0x07, 5}, + {0x26, 8}, {0x1c,12}, {0x13,13}, {0x06, 6}, + {0xfd, 8}, {0x12,12}, {0x07, 6}, {0x04, 9}, + {0x12,13}, {0x06, 7}, {0x1e,12}, {0x14,16}, + {0x04, 7}, {0x15,12}, {0x05, 7}, {0x11,12}, + {0x78, 7}, {0x11,13}, {0x7a, 7}, {0x10,13}, + {0x21, 8}, {0x1a,16}, {0x25, 8}, {0x19,16}, + {0x24, 8}, {0x18,16}, {0x05, 9}, {0x17,16}, + {0x07, 9}, {0x16,16}, {0x0d,10}, {0x15,16}, + {0x1f,12}, {0x1a,12}, {0x19,12}, {0x17,12}, + {0x16,12}, {0x1f,13}, {0x1e,13}, {0x1d,13}, + {0x1c,13}, {0x1b,13}, {0x1f,16}, {0x1e,16}, + {0x1d,16}, {0x1c,16}, {0x1b,16}, + {0x01,6}, /* escape */ + {0x06,4}, /* EOB */ +}; + +static const UINT8 mpeg1_level[111] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 1, 2, 3, 4, 5, 1, + 2, 3, 4, 1, 2, 3, 1, 2, + 3, 1, 2, 3, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, +}; + +static const UINT8 mpeg1_run[111] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 3, + 3, 3, 3, 4, 4, 4, 5, 5, + 5, 6, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, + 13, 13, 14, 14, 15, 15, 16, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, +}; + +static RLTable rl_mpeg1 = { + 111, + 111, + mpeg1_vlc, + mpeg1_run, + mpeg1_level, +}; + +static RLTable rl_mpeg2 = { + 111, + 111, + mpeg2_vlc, + mpeg1_run, + mpeg1_level, +}; + +static const UINT8 mbAddrIncrTable[35][2] = { + {0x1, 1}, + {0x3, 3}, + {0x2, 3}, + {0x3, 4}, + {0x2, 4}, + {0x3, 5}, + {0x2, 5}, + {0x7, 7}, + {0x6, 7}, + {0xb, 8}, + {0xa, 8}, + {0x9, 8}, + {0x8, 8}, + {0x7, 8}, + {0x6, 8}, + {0x17, 10}, + {0x16, 10}, + {0x15, 10}, + {0x14, 10}, + {0x13, 10}, + {0x12, 10}, + {0x23, 11}, + {0x22, 11}, + {0x21, 11}, + {0x20, 11}, + {0x1f, 11}, + {0x1e, 11}, + {0x1d, 11}, + {0x1c, 11}, + {0x1b, 11}, + {0x1a, 11}, + {0x19, 11}, + {0x18, 11}, + {0x8, 11}, /* escape */ + {0xf, 11}, /* stuffing */ +}; + +static const UINT8 mbPatTable[63][2] = { + {0xb, 5}, + {0x9, 5}, + {0xd, 6}, + {0xd, 4}, + {0x17, 7}, + {0x13, 7}, + {0x1f, 8}, + {0xc, 4}, + {0x16, 7}, + {0x12, 7}, + {0x1e, 8}, + {0x13, 5}, + {0x1b, 8}, + {0x17, 8}, + {0x13, 8}, + {0xb, 4}, + {0x15, 7}, + {0x11, 7}, + {0x1d, 8}, + {0x11, 5}, + {0x19, 8}, + {0x15, 8}, + {0x11, 8}, + {0xf, 6}, + {0xf, 8}, + {0xd, 8}, + {0x3, 9}, + {0xf, 5}, + {0xb, 8}, + {0x7, 8}, + {0x7, 9}, + {0xa, 4}, + {0x14, 7}, + {0x10, 7}, + {0x1c, 8}, + {0xe, 6}, + {0xe, 8}, + {0xc, 8}, + {0x2, 9}, + {0x10, 5}, + {0x18, 8}, + {0x14, 8}, + {0x10, 8}, + {0xe, 5}, + {0xa, 8}, + {0x6, 8}, + {0x6, 9}, + {0x12, 5}, + {0x1a, 8}, + {0x16, 8}, + {0x12, 8}, + {0xd, 5}, + {0x9, 8}, + {0x5, 8}, + {0x5, 9}, + {0xc, 5}, + {0x8, 8}, + {0x4, 8}, + {0x4, 9}, + {0x7, 3}, + {0xa, 5}, + {0x8, 5}, + {0xc, 6} +}; + +#define MB_INTRA 0x01 +#define MB_PAT 0x02 +#define MB_BACK 0x04 +#define MB_FOR 0x08 +#define MB_QUANT 0x10 + +static const UINT8 table_mb_ptype[32][2] = { + [ MB_FOR|MB_PAT ] { 1, 1 }, + [ MB_PAT ] { 1, 2 }, + [ MB_FOR ] { 1, 3 }, + [ MB_INTRA ] { 3, 5 }, + [ MB_QUANT|MB_FOR|MB_PAT ] { 2, 5 }, + [ MB_QUANT|MB_PAT ] { 1, 5 }, + [ MB_QUANT|MB_INTRA ] { 1, 6 }, +}; + +static const UINT8 table_mb_btype[32][2] = { + [ MB_FOR|MB_BACK ] { 2, 2 }, + [ MB_FOR|MB_BACK|MB_PAT ] { 3, 2 }, + [ MB_BACK ] { 2, 3 }, + [ MB_BACK|MB_PAT ] { 3, 3 }, + [ MB_FOR ] { 2, 4 }, + [ MB_FOR|MB_PAT ] { 3, 4 }, + [ MB_INTRA ] { 3, 5 }, + [ MB_QUANT|MB_FOR|MB_BACK|MB_PAT ] { 2, 5 }, + [ MB_QUANT|MB_FOR|MB_PAT ] { 3, 6 }, + [ MB_QUANT|MB_BACK|MB_PAT ] { 2, 6 }, + [ MB_QUANT|MB_INTRA ] { 1, 6 }, +}; + +static const UINT8 mbMotionVectorTable[17][2] = { +{ 0x1, 1 }, +{ 0x1, 2 }, +{ 0x1, 3 }, +{ 0x1, 4 }, +{ 0x3, 6 }, +{ 0x5, 7 }, +{ 0x4, 7 }, +{ 0x3, 7 }, +{ 0xb, 9 }, +{ 0xa, 9 }, +{ 0x9, 9 }, +{ 0x11, 10 }, +{ 0x10, 10 }, +{ 0xf, 10 }, +{ 0xe, 10 }, +{ 0xd, 10 }, +{ 0xc, 10 }, +}; + +const UINT8 zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static const int frame_rate_tab[9] = { + 0, + (int)(23.976 * FRAME_RATE_BASE), + (int)(24 * FRAME_RATE_BASE), + (int)(25 * FRAME_RATE_BASE), + (int)(29.97 * FRAME_RATE_BASE), + (int)(30 * FRAME_RATE_BASE), + (int)(50 * FRAME_RATE_BASE), + (int)(59.94 * FRAME_RATE_BASE), + (int)(60 * FRAME_RATE_BASE), +}; + +static const UINT8 non_linear_qscale[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8,10,12,14,16,18,20,22, + 24,28,32,36,40,44,48,52, + 56,64,72,80,88,96,104,112, +}; diff --git a/libavcodec/mpeg4data.h b/libavcodec/mpeg4data.h new file mode 100644 index 0000000000..54b93d97ed --- /dev/null +++ b/libavcodec/mpeg4data.h @@ -0,0 +1,106 @@ + +/* dc encoding for mpeg4 */ +static const UINT8 DCtab_lum[13][2] = +{ + {3,3}, {3,2}, {2,2}, {2,3}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, + {1,8}, {1,9}, {1,10}, {1,11}, +}; + +static const UINT8 DCtab_chrom[13][2] = +{ + {3,2}, {2,2}, {1,2}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, {1,8}, + {1,9}, {1,10}, {1,11}, {1,12}, +}; + +const UINT16 intra_vlc[103][2] = { +{ 0x2, 2 }, +{ 0x6, 3 },{ 0xf, 4 },{ 0xd, 5 },{ 0xc, 5 }, +{ 0x15, 6 },{ 0x13, 6 },{ 0x12, 6 },{ 0x17, 7 }, +{ 0x1f, 8 },{ 0x1e, 8 },{ 0x1d, 8 },{ 0x25, 9 }, +{ 0x24, 9 },{ 0x23, 9 },{ 0x21, 9 },{ 0x21, 10 }, +{ 0x20, 10 },{ 0xf, 10 },{ 0xe, 10 },{ 0x7, 11 }, +{ 0x6, 11 },{ 0x20, 11 },{ 0x21, 11 },{ 0x50, 12 }, +{ 0x51, 12 },{ 0x52, 12 },{ 0xe, 4 },{ 0x14, 6 }, +{ 0x16, 7 },{ 0x1c, 8 },{ 0x20, 9 },{ 0x1f, 9 }, +{ 0xd, 10 },{ 0x22, 11 },{ 0x53, 12 },{ 0x55, 12 }, +{ 0xb, 5 },{ 0x15, 7 },{ 0x1e, 9 },{ 0xc, 10 }, +{ 0x56, 12 },{ 0x11, 6 },{ 0x1b, 8 },{ 0x1d, 9 }, +{ 0xb, 10 },{ 0x10, 6 },{ 0x22, 9 },{ 0xa, 10 }, +{ 0xd, 6 },{ 0x1c, 9 },{ 0x8, 10 },{ 0x12, 7 }, +{ 0x1b, 9 },{ 0x54, 12 },{ 0x14, 7 },{ 0x1a, 9 }, +{ 0x57, 12 },{ 0x19, 8 },{ 0x9, 10 },{ 0x18, 8 }, +{ 0x23, 11 },{ 0x17, 8 },{ 0x19, 9 },{ 0x18, 9 }, +{ 0x7, 10 },{ 0x58, 12 },{ 0x7, 4 },{ 0xc, 6 }, +{ 0x16, 8 },{ 0x17, 9 },{ 0x6, 10 },{ 0x5, 11 }, +{ 0x4, 11 },{ 0x59, 12 },{ 0xf, 6 },{ 0x16, 9 }, +{ 0x5, 10 },{ 0xe, 6 },{ 0x4, 10 },{ 0x11, 7 }, +{ 0x24, 11 },{ 0x10, 7 },{ 0x25, 11 },{ 0x13, 7 }, +{ 0x5a, 12 },{ 0x15, 8 },{ 0x5b, 12 },{ 0x14, 8 }, +{ 0x13, 8 },{ 0x1a, 8 },{ 0x15, 9 },{ 0x14, 9 }, +{ 0x13, 9 },{ 0x12, 9 },{ 0x11, 9 },{ 0x26, 11 }, +{ 0x27, 11 },{ 0x5c, 12 },{ 0x5d, 12 },{ 0x5e, 12 }, +{ 0x5f, 12 },{ 0x3, 7 }, +}; + +const INT8 intra_level[102] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 1, 2, 3, + 4, 5, 1, 2, 3, 4, 1, 2, + 3, 1, 2, 3, 1, 2, 3, 1, + 2, 3, 1, 2, 1, 2, 1, 1, + 1, 1, 1, 1, 2, 3, 4, 5, + 6, 7, 8, 1, 2, 3, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, +}; + +const INT8 intra_run[102] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 2, 2, 2, + 2, 2, 3, 3, 3, 3, 4, 4, + 4, 5, 5, 5, 6, 6, 6, 7, + 7, 7, 8, 8, 9, 9, 10, 11, + 12, 13, 14, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 2, 2, + 3, 3, 4, 4, 5, 5, 6, 6, + 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, +}; + +static RLTable rl_intra = { + 102, + 67, + intra_vlc, + intra_run, + intra_level, +}; + +/* alternate scan orders used when doing AC prediction */ +UINT8 ff_alternate_horizontal_scan[64] = { + 0, 1, 2, 3, 8, 9, 16, 17, + 10, 11, 4, 5, 6, 7, 15, 14, + 13, 12, 19, 18, 24, 25, 32, 33, + 26, 27, 20, 21, 22, 23, 28, 29, + 30, 31, 34, 35, 40, 41, 48, 49, + 42, 43, 36, 37, 38, 39, 44, 45, + 46, 47, 50, 51, 56, 57, 58, 59, + 52, 53, 54, 55, 60, 61, 62, 63, +}; + +UINT8 ff_alternate_vertical_scan[64] = { + 0, 8, 16, 24, 1, 9, 2, 10, + 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, + 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, + 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, + 38, 46, 54, 62, 39, 47, 55, 63, +}; diff --git a/libavcodec/mpegaudio.c b/libavcodec/mpegaudio.c new file mode 100644 index 0000000000..af05e29279 --- /dev/null +++ b/libavcodec/mpegaudio.c @@ -0,0 +1,774 @@ +/* + * The simplest mpeg audio layer 2 encoder + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include "avcodec.h" +#include "mpegaudio.h" + +#define NDEBUG +#include <assert.h> + +/* define it to use floats in quantization (I don't like floats !) */ +//#define USE_FLOATS + +#define MPA_STEREO 0 +#define MPA_JSTEREO 1 +#define MPA_DUAL 2 +#define MPA_MONO 3 + +#include "mpegaudiotab.h" + +int MPA_encode_init(AVCodecContext *avctx) +{ + MpegAudioContext *s = avctx->priv_data; + int freq = avctx->sample_rate; + int bitrate = avctx->bit_rate; + int channels = avctx->channels; + int i, v, table, ch_bitrate; + float a; + + if (channels > 2) + return -1; + bitrate = bitrate / 1000; + s->nb_channels = channels; + s->freq = freq; + s->bit_rate = bitrate * 1000; + avctx->frame_size = MPA_FRAME_SIZE; + avctx->key_frame = 1; /* always key frame */ + + /* encoding freq */ + s->lsf = 0; + for(i=0;i<3;i++) { + if (freq_tab[i] == freq) + break; + if ((freq_tab[i] / 2) == freq) { + s->lsf = 1; + break; + } + } + if (i == 3) + return -1; + s->freq_index = i; + + /* encoding bitrate & frequency */ + for(i=0;i<15;i++) { + if (bitrate_tab[1-s->lsf][i] == bitrate) + break; + } + if (i == 15) + return -1; + s->bitrate_index = i; + + /* compute total header size & pad bit */ + + a = (float)(bitrate * 1000 * MPA_FRAME_SIZE) / (freq * 8.0); + s->frame_size = ((int)a) * 8; + + /* frame fractional size to compute padding */ + s->frame_frac = 0; + s->frame_frac_incr = (int)((a - floor(a)) * 65536.0); + + /* select the right allocation table */ + ch_bitrate = bitrate / s->nb_channels; + if (!s->lsf) { + if ((freq == 48000 && ch_bitrate >= 56) || + (ch_bitrate >= 56 && ch_bitrate <= 80)) + table = 0; + else if (freq != 48000 && ch_bitrate >= 96) + table = 1; + else if (freq != 32000 && ch_bitrate <= 48) + table = 2; + else + table = 3; + } else { + table = 4; + } + /* number of used subbands */ + s->sblimit = sblimit_table[table]; + s->alloc_table = alloc_tables[table]; + +#ifdef DEBUG + printf("%d kb/s, %d Hz, frame_size=%d bits, table=%d, padincr=%x\n", + bitrate, freq, s->frame_size, table, s->frame_frac_incr); +#endif + + for(i=0;i<s->nb_channels;i++) + s->samples_offset[i] = 0; + + for(i=0;i<512;i++) { + float a = enwindow[i] * 32768.0 * 16.0; + filter_bank[i] = (int)(a); + } + for(i=0;i<64;i++) { + v = (int)(pow(2.0, (3 - i) / 3.0) * (1 << 20)); + if (v <= 0) + v = 1; + scale_factor_table[i] = v; +#ifdef USE_FLOATS + scale_factor_inv_table[i] = pow(2.0, -(3 - i) / 3.0) / (float)(1 << 20); +#else +#define P 15 + scale_factor_shift[i] = 21 - P - (i / 3); + scale_factor_mult[i] = (1 << P) * pow(2.0, (i % 3) / 3.0); +#endif + } + for(i=0;i<128;i++) { + v = i - 64; + if (v <= -3) + v = 0; + else if (v < 0) + v = 1; + else if (v == 0) + v = 2; + else if (v < 3) + v = 3; + else + v = 4; + scale_diff_table[i] = v; + } + + for(i=0;i<17;i++) { + v = quant_bits[i]; + if (v < 0) + v = -v; + else + v = v * 3; + total_quant_bits[i] = 12 * v; + } + + return 0; +} + +/* 32 point floating point IDCT */ +static void idct32(int *out, int *tab, int sblimit, int left_shift) +{ + int i, j; + int *t, *t1, xr; + const int *xp = costab32; + + for(j=31;j>=3;j-=2) tab[j] += tab[j - 2]; + + t = tab + 30; + t1 = tab + 2; + do { + t[0] += t[-4]; + t[1] += t[1 - 4]; + t -= 4; + } while (t != t1); + + t = tab + 28; + t1 = tab + 4; + do { + t[0] += t[-8]; + t[1] += t[1-8]; + t[2] += t[2-8]; + t[3] += t[3-8]; + t -= 8; + } while (t != t1); + + t = tab; + t1 = tab + 32; + do { + t[ 3] = -t[ 3]; + t[ 6] = -t[ 6]; + + t[11] = -t[11]; + t[12] = -t[12]; + t[13] = -t[13]; + t[15] = -t[15]; + t += 16; + } while (t != t1); + + + t = tab; + t1 = tab + 8; + do { + int x1, x2, x3, x4; + + x3 = MUL(t[16], FIX(SQRT2*0.5)); + x4 = t[0] - x3; + x3 = t[0] + x3; + + x2 = MUL(-(t[24] + t[8]), FIX(SQRT2*0.5)); + x1 = MUL((t[8] - x2), xp[0]); + x2 = MUL((t[8] + x2), xp[1]); + + t[ 0] = x3 + x1; + t[ 8] = x4 - x2; + t[16] = x4 + x2; + t[24] = x3 - x1; + t++; + } while (t != t1); + + xp += 2; + t = tab; + t1 = tab + 4; + do { + xr = MUL(t[28],xp[0]); + t[28] = (t[0] - xr); + t[0] = (t[0] + xr); + + xr = MUL(t[4],xp[1]); + t[ 4] = (t[24] - xr); + t[24] = (t[24] + xr); + + xr = MUL(t[20],xp[2]); + t[20] = (t[8] - xr); + t[ 8] = (t[8] + xr); + + xr = MUL(t[12],xp[3]); + t[12] = (t[16] - xr); + t[16] = (t[16] + xr); + t++; + } while (t != t1); + xp += 4; + + for (i = 0; i < 4; i++) { + xr = MUL(tab[30-i*4],xp[0]); + tab[30-i*4] = (tab[i*4] - xr); + tab[ i*4] = (tab[i*4] + xr); + + xr = MUL(tab[ 2+i*4],xp[1]); + tab[ 2+i*4] = (tab[28-i*4] - xr); + tab[28-i*4] = (tab[28-i*4] + xr); + + xr = MUL(tab[31-i*4],xp[0]); + tab[31-i*4] = (tab[1+i*4] - xr); + tab[ 1+i*4] = (tab[1+i*4] + xr); + + xr = MUL(tab[ 3+i*4],xp[1]); + tab[ 3+i*4] = (tab[29-i*4] - xr); + tab[29-i*4] = (tab[29-i*4] + xr); + + xp += 2; + } + + t = tab + 30; + t1 = tab + 1; + do { + xr = MUL(t1[0], *xp); + t1[0] = (t[0] - xr); + t[0] = (t[0] + xr); + t -= 2; + t1 += 2; + xp++; + } while (t >= tab); + + for(i=0;i<32;i++) { + out[i] = tab[bitinv32[i]] << left_shift; + } +} + +static void filter(MpegAudioContext *s, int ch, short *samples, int incr) +{ + short *p, *q; + int sum, offset, i, j, norm, n; + short tmp[64]; + int tmp1[32]; + int *out; + + // print_pow1(samples, 1152); + + offset = s->samples_offset[ch]; + out = &s->sb_samples[ch][0][0][0]; + for(j=0;j<36;j++) { + /* 32 samples at once */ + for(i=0;i<32;i++) { + s->samples_buf[ch][offset + (31 - i)] = samples[0]; + samples += incr; + } + + /* filter */ + p = s->samples_buf[ch] + offset; + q = filter_bank; + /* maxsum = 23169 */ + for(i=0;i<64;i++) { + sum = p[0*64] * q[0*64]; + sum += p[1*64] * q[1*64]; + sum += p[2*64] * q[2*64]; + sum += p[3*64] * q[3*64]; + sum += p[4*64] * q[4*64]; + sum += p[5*64] * q[5*64]; + sum += p[6*64] * q[6*64]; + sum += p[7*64] * q[7*64]; + tmp[i] = sum >> 14; + p++; + q++; + } + tmp1[0] = tmp[16]; + for( i=1; i<=16; i++ ) tmp1[i] = tmp[i+16]+tmp[16-i]; + for( i=17; i<=31; i++ ) tmp1[i] = tmp[i+16]-tmp[80-i]; + + /* integer IDCT 32 with normalization. XXX: There may be some + overflow left */ + norm = 0; + for(i=0;i<32;i++) { + norm |= abs(tmp1[i]); + } + n = log2(norm) - 12; + if (n > 0) { + for(i=0;i<32;i++) + tmp1[i] >>= n; + } else { + n = 0; + } + + idct32(out, tmp1, s->sblimit, n); + + /* advance of 32 samples */ + offset -= 32; + out += 32; + /* handle the wrap around */ + if (offset < 0) { + memmove(s->samples_buf[ch] + SAMPLES_BUF_SIZE - (512 - 32), + s->samples_buf[ch], (512 - 32) * 2); + offset = SAMPLES_BUF_SIZE - 512; + } + } + s->samples_offset[ch] = offset; + + // print_pow(s->sb_samples, 1152); +} + +static void compute_scale_factors(unsigned char scale_code[SBLIMIT], + unsigned char scale_factors[SBLIMIT][3], + int sb_samples[3][12][SBLIMIT], + int sblimit) +{ + int *p, vmax, v, n, i, j, k, code; + int index, d1, d2; + unsigned char *sf = &scale_factors[0][0]; + + for(j=0;j<sblimit;j++) { + for(i=0;i<3;i++) { + /* find the max absolute value */ + p = &sb_samples[i][0][j]; + vmax = abs(*p); + for(k=1;k<12;k++) { + p += SBLIMIT; + v = abs(*p); + if (v > vmax) + vmax = v; + } + /* compute the scale factor index using log 2 computations */ + if (vmax > 0) { + n = log2(vmax); + /* n is the position of the MSB of vmax. now + use at most 2 compares to find the index */ + index = (21 - n) * 3 - 3; + if (index >= 0) { + while (vmax <= scale_factor_table[index+1]) + index++; + } else { + index = 0; /* very unlikely case of overflow */ + } + } else { + index = 63; + } + +#if 0 + printf("%2d:%d in=%x %x %d\n", + j, i, vmax, scale_factor_table[index], index); +#endif + /* store the scale factor */ + assert(index >=0 && index <= 63); + sf[i] = index; + } + + /* compute the transmission factor : look if the scale factors + are close enough to each other */ + d1 = scale_diff_table[sf[0] - sf[1] + 64]; + d2 = scale_diff_table[sf[1] - sf[2] + 64]; + + /* handle the 25 cases */ + switch(d1 * 5 + d2) { + case 0*5+0: + case 0*5+4: + case 3*5+4: + case 4*5+0: + case 4*5+4: + code = 0; + break; + case 0*5+1: + case 0*5+2: + case 4*5+1: + case 4*5+2: + code = 3; + sf[2] = sf[1]; + break; + case 0*5+3: + case 4*5+3: + code = 3; + sf[1] = sf[2]; + break; + case 1*5+0: + case 1*5+4: + case 2*5+4: + code = 1; + sf[1] = sf[0]; + break; + case 1*5+1: + case 1*5+2: + case 2*5+0: + case 2*5+1: + case 2*5+2: + code = 2; + sf[1] = sf[2] = sf[0]; + break; + case 2*5+3: + case 3*5+3: + code = 2; + sf[0] = sf[1] = sf[2]; + break; + case 3*5+0: + case 3*5+1: + case 3*5+2: + code = 2; + sf[0] = sf[2] = sf[1]; + break; + case 1*5+3: + code = 2; + if (sf[0] > sf[2]) + sf[0] = sf[2]; + sf[1] = sf[2] = sf[0]; + break; + default: + abort(); + } + +#if 0 + printf("%d: %2d %2d %2d %d %d -> %d\n", j, + sf[0], sf[1], sf[2], d1, d2, code); +#endif + scale_code[j] = code; + sf += 3; + } +} + +/* The most important function : psycho acoustic module. In this + encoder there is basically none, so this is the worst you can do, + but also this is the simpler. */ +static void psycho_acoustic_model(MpegAudioContext *s, short smr[SBLIMIT]) +{ + int i; + + for(i=0;i<s->sblimit;i++) { + smr[i] = (int)(fixed_smr[i] * 10); + } +} + + +#define SB_NOTALLOCATED 0 +#define SB_ALLOCATED 1 +#define SB_NOMORE 2 + +/* Try to maximize the smr while using a number of bits inferior to + the frame size. I tried to make the code simpler, faster and + smaller than other encoders :-) */ +static void compute_bit_allocation(MpegAudioContext *s, + short smr1[MPA_MAX_CHANNELS][SBLIMIT], + unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT], + int *padding) +{ + int i, ch, b, max_smr, max_ch, max_sb, current_frame_size, max_frame_size; + int incr; + short smr[MPA_MAX_CHANNELS][SBLIMIT]; + unsigned char subband_status[MPA_MAX_CHANNELS][SBLIMIT]; + const unsigned char *alloc; + + memcpy(smr, smr1, s->nb_channels * sizeof(short) * SBLIMIT); + memset(subband_status, SB_NOTALLOCATED, s->nb_channels * SBLIMIT); + memset(bit_alloc, 0, s->nb_channels * SBLIMIT); + + /* compute frame size and padding */ + max_frame_size = s->frame_size; + s->frame_frac += s->frame_frac_incr; + if (s->frame_frac >= 65536) { + s->frame_frac -= 65536; + s->do_padding = 1; + max_frame_size += 8; + } else { + s->do_padding = 0; + } + + /* compute the header + bit alloc size */ + current_frame_size = 32; + alloc = s->alloc_table; + for(i=0;i<s->sblimit;i++) { + incr = alloc[0]; + current_frame_size += incr * s->nb_channels; + alloc += 1 << incr; + } + for(;;) { + /* look for the subband with the largest signal to mask ratio */ + max_sb = -1; + max_ch = -1; + max_smr = 0x80000000; + for(ch=0;ch<s->nb_channels;ch++) { + for(i=0;i<s->sblimit;i++) { + if (smr[ch][i] > max_smr && subband_status[ch][i] != SB_NOMORE) { + max_smr = smr[ch][i]; + max_sb = i; + max_ch = ch; + } + } + } +#if 0 + printf("current=%d max=%d max_sb=%d alloc=%d\n", + current_frame_size, max_frame_size, max_sb, + bit_alloc[max_sb]); +#endif + if (max_sb < 0) + break; + + /* find alloc table entry (XXX: not optimal, should use + pointer table) */ + alloc = s->alloc_table; + for(i=0;i<max_sb;i++) { + alloc += 1 << alloc[0]; + } + + if (subband_status[max_ch][max_sb] == SB_NOTALLOCATED) { + /* nothing was coded for this band: add the necessary bits */ + incr = 2 + nb_scale_factors[s->scale_code[max_ch][max_sb]] * 6; + incr += total_quant_bits[alloc[1]]; + } else { + /* increments bit allocation */ + b = bit_alloc[max_ch][max_sb]; + incr = total_quant_bits[alloc[b + 1]] - + total_quant_bits[alloc[b]]; + } + + if (current_frame_size + incr <= max_frame_size) { + /* can increase size */ + b = ++bit_alloc[max_ch][max_sb]; + current_frame_size += incr; + /* decrease smr by the resolution we added */ + smr[max_ch][max_sb] = smr1[max_ch][max_sb] - quant_snr[alloc[b]]; + /* max allocation size reached ? */ + if (b == ((1 << alloc[0]) - 1)) + subband_status[max_ch][max_sb] = SB_NOMORE; + else + subband_status[max_ch][max_sb] = SB_ALLOCATED; + } else { + /* cannot increase the size of this subband */ + subband_status[max_ch][max_sb] = SB_NOMORE; + } + } + *padding = max_frame_size - current_frame_size; + assert(*padding >= 0); + +#if 0 + for(i=0;i<s->sblimit;i++) { + printf("%d ", bit_alloc[i]); + } + printf("\n"); +#endif +} + +/* + * Output the mpeg audio layer 2 frame. Note how the code is small + * compared to other encoders :-) + */ +static void encode_frame(MpegAudioContext *s, + unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT], + int padding) +{ + int i, j, k, l, bit_alloc_bits, b, ch; + unsigned char *sf; + int q[3]; + PutBitContext *p = &s->pb; + + /* header */ + + put_bits(p, 12, 0xfff); + put_bits(p, 1, 1 - s->lsf); /* 1 = mpeg1 ID, 0 = mpeg2 lsf ID */ + put_bits(p, 2, 4-2); /* layer 2 */ + put_bits(p, 1, 1); /* no error protection */ + put_bits(p, 4, s->bitrate_index); + put_bits(p, 2, s->freq_index); + put_bits(p, 1, s->do_padding); /* use padding */ + put_bits(p, 1, 0); /* private_bit */ + put_bits(p, 2, s->nb_channels == 2 ? MPA_STEREO : MPA_MONO); + put_bits(p, 2, 0); /* mode_ext */ + put_bits(p, 1, 0); /* no copyright */ + put_bits(p, 1, 1); /* original */ + put_bits(p, 2, 0); /* no emphasis */ + + /* bit allocation */ + j = 0; + for(i=0;i<s->sblimit;i++) { + bit_alloc_bits = s->alloc_table[j]; + for(ch=0;ch<s->nb_channels;ch++) { + put_bits(p, bit_alloc_bits, bit_alloc[ch][i]); + } + j += 1 << bit_alloc_bits; + } + + /* scale codes */ + for(i=0;i<s->sblimit;i++) { + for(ch=0;ch<s->nb_channels;ch++) { + if (bit_alloc[ch][i]) + put_bits(p, 2, s->scale_code[ch][i]); + } + } + + /* scale factors */ + for(i=0;i<s->sblimit;i++) { + for(ch=0;ch<s->nb_channels;ch++) { + if (bit_alloc[ch][i]) { + sf = &s->scale_factors[ch][i][0]; + switch(s->scale_code[ch][i]) { + case 0: + put_bits(p, 6, sf[0]); + put_bits(p, 6, sf[1]); + put_bits(p, 6, sf[2]); + break; + case 3: + case 1: + put_bits(p, 6, sf[0]); + put_bits(p, 6, sf[2]); + break; + case 2: + put_bits(p, 6, sf[0]); + break; + } + } + } + } + + /* quantization & write sub band samples */ + + for(k=0;k<3;k++) { + for(l=0;l<12;l+=3) { + j = 0; + for(i=0;i<s->sblimit;i++) { + bit_alloc_bits = s->alloc_table[j]; + for(ch=0;ch<s->nb_channels;ch++) { + b = bit_alloc[ch][i]; + if (b) { + int qindex, steps, m, sample, bits; + /* we encode 3 sub band samples of the same sub band at a time */ + qindex = s->alloc_table[j+b]; + steps = quant_steps[qindex]; + for(m=0;m<3;m++) { + sample = s->sb_samples[ch][k][l + m][i]; + /* divide by scale factor */ +#ifdef USE_FLOATS + { + float a; + a = (float)sample * scale_factor_inv_table[s->scale_factors[ch][i][k]]; + q[m] = (int)((a + 1.0) * steps * 0.5); + } +#else + { + int q1, e, shift, mult; + e = s->scale_factors[ch][i][k]; + shift = scale_factor_shift[e]; + mult = scale_factor_mult[e]; + + /* normalize to P bits */ + if (shift < 0) + q1 = sample << (-shift); + else + q1 = sample >> shift; + q1 = (q1 * mult) >> P; + q[m] = ((q1 + (1 << P)) * steps) >> (P + 1); + } +#endif + if (q[m] >= steps) + q[m] = steps - 1; + assert(q[m] >= 0 && q[m] < steps); + } + bits = quant_bits[qindex]; + if (bits < 0) { + /* group the 3 values to save bits */ + put_bits(p, -bits, + q[0] + steps * (q[1] + steps * q[2])); +#if 0 + printf("%d: gr1 %d\n", + i, q[0] + steps * (q[1] + steps * q[2])); +#endif + } else { +#if 0 + printf("%d: gr3 %d %d %d\n", + i, q[0], q[1], q[2]); +#endif + put_bits(p, bits, q[0]); + put_bits(p, bits, q[1]); + put_bits(p, bits, q[2]); + } + } + } + /* next subband in alloc table */ + j += 1 << bit_alloc_bits; + } + } + } + + /* padding */ + for(i=0;i<padding;i++) + put_bits(p, 1, 0); + + /* flush */ + flush_put_bits(p); +} + +int MPA_encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + MpegAudioContext *s = avctx->priv_data; + short *samples = data; + short smr[MPA_MAX_CHANNELS][SBLIMIT]; + unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT]; + int padding, i; + + for(i=0;i<s->nb_channels;i++) { + filter(s, i, samples + i, s->nb_channels); + } + + for(i=0;i<s->nb_channels;i++) { + compute_scale_factors(s->scale_code[i], s->scale_factors[i], + s->sb_samples[i], s->sblimit); + } + for(i=0;i<s->nb_channels;i++) { + psycho_acoustic_model(s, smr[i]); + } + compute_bit_allocation(s, smr, bit_alloc, &padding); + + init_put_bits(&s->pb, frame, MPA_MAX_CODED_FRAME_SIZE, NULL, NULL); + + encode_frame(s, bit_alloc, padding); + + s->nb_samples += MPA_FRAME_SIZE; + return s->pb.buf_ptr - s->pb.buf; +} + + +AVCodec mp2_encoder = { + "mp2", + CODEC_TYPE_AUDIO, + CODEC_ID_MP2, + sizeof(MpegAudioContext), + MPA_encode_init, + MPA_encode_frame, + NULL, +}; diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h new file mode 100644 index 0000000000..006650a99c --- /dev/null +++ b/libavcodec/mpegaudio.h @@ -0,0 +1,34 @@ + +/* max compressed frame size */ +#define MPA_MAX_CODED_FRAME_SIZE 1200 + +#define MPA_FRAME_SIZE 1152 +#define MPA_MAX_CHANNELS 2 + +#define SAMPLES_BUF_SIZE 4096 +#define SBLIMIT 32 /* number of subbands */ +#define DCT_BITS 14 /* number of bits for the DCT */ +#define MUL(a,b) (((a) * (b)) >> DCT_BITS) +#define FIX(a) ((int)((a) * (1 << DCT_BITS))) + +typedef struct MpegAudioContext { + PutBitContext pb; + int nb_channels; + int freq, bit_rate; + int lsf; /* 1 if mpeg2 low bitrate selected */ + int bitrate_index; /* bit rate */ + int freq_index; + int frame_size; /* frame size, in bits, without padding */ + long long nb_samples; /* total number of samples encoded */ + /* padding computation */ + int frame_frac, frame_frac_incr, do_padding; + short samples_buf[MPA_MAX_CHANNELS][SAMPLES_BUF_SIZE]; /* buffer for filter */ + int samples_offset[MPA_MAX_CHANNELS]; /* offset in samples_buf */ + int sb_samples[MPA_MAX_CHANNELS][3][12][SBLIMIT]; + unsigned char scale_factors[MPA_MAX_CHANNELS][SBLIMIT][3]; /* scale factors */ + /* code to group 3 scale factors */ + unsigned char scale_code[MPA_MAX_CHANNELS][SBLIMIT]; + int sblimit; /* number of used subbands */ + const unsigned char *alloc_table; +} MpegAudioContext; + diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c new file mode 100644 index 0000000000..f3fa90af55 --- /dev/null +++ b/libavcodec/mpegaudiodec.c @@ -0,0 +1,293 @@ +/* + * MPEG Audio decoder + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "avcodec.h" +#include "mpglib/mpg123.h" + +/* + * TODO: + * - add free format + * - do not rely anymore on mpglib (first step: implement dct64 and decoding filter) + */ + +#define HEADER_SIZE 4 +#define BACKSTEP_SIZE 512 + +typedef struct MPADecodeContext { + struct mpstr mpstr; + UINT8 inbuf1[2][MAXFRAMESIZE + BACKSTEP_SIZE]; /* input buffer */ + int inbuf_index; + UINT8 *inbuf_ptr, *inbuf; + int frame_size; + int error_protection; + int layer; + int sample_rate; + int bit_rate; + int old_frame_size; + GetBitContext gb; +} MPADecodeContext; + +/* XXX: suppress that mess */ +struct mpstr *gmp; +GetBitContext *gmp_gb; +static MPADecodeContext *gmp_s; + +/* XXX: merge constants with encoder */ +static const unsigned short mp_bitrate_tab[2][3][15] = { + { {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448 }, + {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384 }, + {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 } }, + { {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256}, + {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160}, + {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160} + } +}; + +static unsigned short mp_freq_tab[3] = { 44100, 48000, 32000 }; + +static int decode_init(AVCodecContext * avctx) +{ + MPADecodeContext *s = avctx->priv_data; + struct mpstr *mp = &s->mpstr; + static int init; + + mp->fr.single = -1; + mp->synth_bo = 1; + + if(!init) { + init = 1; + make_decode_tables(32767); + init_layer2(); + init_layer3(SBLIMIT); + } + + s->inbuf_index = 0; + s->inbuf = &s->inbuf1[s->inbuf_index][BACKSTEP_SIZE]; + s->inbuf_ptr = s->inbuf; + + return 0; +} + +/* fast header check for resync */ +static int check_header(UINT32 header) +{ + /* header */ + if ((header & 0xffe00000) != 0xffe00000) + return -1; + /* layer check */ + if (((header >> 17) & 3) == 0) + return -1; + /* bit rate : currently no free format supported */ + if (((header >> 12) & 0xf) == 0xf || + ((header >> 12) & 0xf) == 0x0) + return -1; + /* frequency */ + if (((header >> 10) & 3) == 3) + return -1; + return 0; +} + +/* header decoding. MUST check the header before because no + consistency check is done there */ +static void decode_header(MPADecodeContext *s, UINT32 header) +{ + struct frame *fr = &s->mpstr.fr; + int sample_rate, frame_size; + + if (header & (1<<20)) { + fr->lsf = (header & (1<<19)) ? 0 : 1; + fr->mpeg25 = 0; + } else { + fr->lsf = 1; + fr->mpeg25 = 1; + } + + s->layer = 4 - ((header >> 17) & 3); + /* extract frequency */ + fr->sampling_frequency = ((header >> 10) & 3); + sample_rate = mp_freq_tab[fr->sampling_frequency] >> (fr->lsf + fr->mpeg25); + fr->sampling_frequency += 3 * (fr->lsf + fr->mpeg25); + + s->error_protection = ((header>>16) & 1) ^ 1; + + fr->bitrate_index = ((header>>12)&0xf); + fr->padding = ((header>>9)&0x1); + fr->extension = ((header>>8)&0x1); + fr->mode = ((header>>6)&0x3); + fr->mode_ext = ((header>>4)&0x3); + fr->copyright = ((header>>3)&0x1); + fr->original = ((header>>2)&0x1); + fr->emphasis = header & 0x3; + + fr->stereo = (fr->mode == MPG_MD_MONO) ? 1 : 2; + + + frame_size = mp_bitrate_tab[fr->lsf][s->layer - 1][fr->bitrate_index]; + s->bit_rate = frame_size * 1000; + switch(s->layer) { + case 1: + frame_size = (frame_size * 12000) / sample_rate; + frame_size = ((frame_size + fr->padding) << 2); + break; + case 2: + frame_size = (frame_size * 144000) / sample_rate; + frame_size += fr->padding; + break; + case 3: + frame_size = (frame_size * 144000) / (sample_rate << fr->lsf); + frame_size += fr->padding; + break; + } + s->frame_size = frame_size; + s->sample_rate = sample_rate; + +#if 0 + printf("layer%d, %d Hz, %d kbits/s, %s\n", + s->layer, s->sample_rate, s->bit_rate, fr->stereo ? "stereo" : "mono"); +#endif +} + +static int mp_decode_frame(MPADecodeContext *s, + short *samples) +{ + int nb_bytes; + + init_get_bits(&s->gb, s->inbuf + HEADER_SIZE, s->inbuf_ptr - s->inbuf - HEADER_SIZE); + + /* skip error protection field */ + if (s->error_protection) + get_bits(&s->gb, 16); + + /* XXX: horrible: global! */ + gmp = &s->mpstr; + gmp_s = s; + gmp_gb = &s->gb; + + nb_bytes = 0; + switch(s->layer) { + case 1: + do_layer1(&s->mpstr.fr,(unsigned char *)samples, &nb_bytes); + break; + case 2: + do_layer2(&s->mpstr.fr,(unsigned char *)samples, &nb_bytes); + break; + case 3: + do_layer3(&s->mpstr.fr,(unsigned char *)samples, &nb_bytes); + s->inbuf_index ^= 1; + s->inbuf = &s->inbuf1[s->inbuf_index][BACKSTEP_SIZE]; + s->old_frame_size = s->frame_size; + break; + default: + break; + } + return nb_bytes; +} + +/* + * seek back in the stream for backstep bytes (at most 511 bytes, and + * at most in last frame). Note that this is slightly incorrect (data + * can span more than one block!) + */ +int set_pointer(long backstep) +{ + UINT8 *ptr; + + /* compute current position in stream */ + ptr = gmp_gb->buf_ptr - (gmp_gb->bit_cnt >> 3); + /* copy old data before current one */ + ptr -= backstep; + memcpy(ptr, gmp_s->inbuf1[gmp_s->inbuf_index ^ 1] + + BACKSTEP_SIZE + gmp_s->old_frame_size - backstep, backstep); + /* init get bits again */ + init_get_bits(gmp_gb, ptr, gmp_s->frame_size + backstep); + + return 0; +} + +static int decode_frame(AVCodecContext * avctx, + void *data, int *data_size, + UINT8 * buf, int buf_size) +{ + MPADecodeContext *s = avctx->priv_data; + UINT32 header; + UINT8 *buf_ptr; + int len, out_size; + short *out_samples = data; + + *data_size = 0; + buf_ptr = buf; + while (buf_size > 0) { + len = s->inbuf_ptr - s->inbuf; + if (s->frame_size == 0) { + /* no header seen : find one. We need at least 7 bytes to parse it */ + len = HEADER_SIZE - len; + if (len > buf_size) + len = buf_size; + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + if ((s->inbuf_ptr - s->inbuf) == HEADER_SIZE) { + header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | + (s->inbuf[2] << 8) | s->inbuf[3]; + if (check_header(header) < 0) { + /* no sync found : move by one byte (inefficient, but simple!) */ + memcpy(s->inbuf, s->inbuf + 1, HEADER_SIZE - 1); + s->inbuf_ptr--; + } else { + decode_header(s, header); + /* update codec info */ + avctx->sample_rate = s->sample_rate; + avctx->channels = s->mpstr.fr.stereo ? 2 : 1; + avctx->bit_rate = s->bit_rate; + } + } + } else if (len < s->frame_size) { + len = s->frame_size - len; + if (len > buf_size) + len = buf_size; + + memcpy(s->inbuf_ptr, buf_ptr, len); + buf_ptr += len; + s->inbuf_ptr += len; + buf_size -= len; + } else { + out_size = mp_decode_frame(s, out_samples); + s->inbuf_ptr = s->inbuf; + s->frame_size = 0; + *data_size = out_size; + break; + } + } + return buf_ptr - buf; +} + +AVCodec mp3_decoder = +{ + "mpegaudio", + CODEC_TYPE_AUDIO, + CODEC_ID_MP2, + sizeof(MPADecodeContext), + decode_init, + NULL, + NULL, + decode_frame, +}; diff --git a/libavcodec/mpegaudiotab.h b/libavcodec/mpegaudiotab.h new file mode 100644 index 0000000000..05bdb9eea1 --- /dev/null +++ b/libavcodec/mpegaudiotab.h @@ -0,0 +1,310 @@ +/* + * mpeg audio layer 2 tables. Most of them come from the mpeg audio + * specification. + * + * Copyright (c) 2000 Gerard Lantau. + * + * The licence of this code is contained in file LICENCE found in the + * same archive + */ + +static const unsigned short bitrate_tab[2][15] = { + {0,8,16,24,32,40,48,56,64,80,96,112,128,144,160}, /* mpeg2 lsf */ + {0,32,48,56,64,80,96,112,128,160,192,224,256,320,384}, /* mpeg1 */ +}; + +static const unsigned short freq_tab[3] = { 44100, 48000, 32000 }; + +#define SQRT2 1.41421356237309514547 + +static const int costab32[30] = { + FIX(0.54119610014619701222), + FIX(1.3065629648763763537), + + FIX(0.50979557910415917998), + FIX(2.5629154477415054814), + FIX(0.89997622313641556513), + FIX(0.60134488693504528634), + + FIX(0.5024192861881556782), + FIX(5.1011486186891552563), + FIX(0.78815462345125020249), + FIX(0.64682178335999007679), + FIX(0.56694403481635768927), + FIX(1.0606776859903470633), + FIX(1.7224470982383341955), + FIX(0.52249861493968885462), + + FIX(10.19000812354803287), + FIX(0.674808341455005678), + FIX(1.1694399334328846596), + FIX(0.53104259108978413284), + FIX(2.0577810099534108446), + FIX(0.58293496820613388554), + FIX(0.83934964541552681272), + FIX(0.50547095989754364798), + FIX(3.4076084184687189804), + FIX(0.62250412303566482475), + FIX(0.97256823786196078263), + FIX(0.51544730992262455249), + FIX(1.4841646163141661852), + FIX(0.5531038960344445421), + FIX(0.74453627100229857749), + FIX(0.5006029982351962726), +}; + +static const int bitinv32[32] = { + 0, 16, 8, 24, 4, 20, 12, 28, + 2, 18, 10, 26, 6, 22, 14, 30, + 1, 17, 9, 25, 5, 21, 13, 29, + 3, 19, 11, 27, 7, 23, 15, 31 +}; + + +static short filter_bank[512]; + +static const double enwindow[512] = {0.000000000, + -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000477, -0.000000954, -0.000000954, + -0.000000954, -0.000000954, -0.000001431, -0.000001431, -0.000001907, -0.000001907, -0.000002384, -0.000002384, + -0.000002861, -0.000003338, -0.000003338, -0.000003815, -0.000004292, -0.000004768, -0.000005245, -0.000006199, + -0.000006676, -0.000007629, -0.000008106, -0.000009060, -0.000010014, -0.000011444, -0.000012398, -0.000013828, + -0.000014782, -0.000016689, -0.000018120, -0.000019550, -0.000021458, -0.000023365, -0.000025272, -0.000027657, + -0.000030041, -0.000032425, -0.000034809, -0.000037670, -0.000040531, -0.000043392, -0.000046253, -0.000049591, + -0.000052929, -0.000055790, -0.000059605, -0.000062943, -0.000066280, -0.000070095, -0.000073433, -0.000076771, + -0.000080585, -0.000083923, -0.000087261, -0.000090599, -0.000093460, -0.000096321, -0.000099182, 0.000101566, + 0.000103951, 0.000105858, 0.000107288, 0.000108242, 0.000108719, 0.000108719, 0.000108242, 0.000106812, + 0.000105381, 0.000102520, 0.000099182, 0.000095367, 0.000090122, 0.000084400, 0.000077724, 0.000069618, + 0.000060558, 0.000050545, 0.000039577, 0.000027180, 0.000013828, -0.000000954, -0.000017166, -0.000034332, + -0.000052929, -0.000072956, -0.000093937, -0.000116348, -0.000140190, -0.000165462, -0.000191212, -0.000218868, + -0.000247478, -0.000277042, -0.000307560, -0.000339031, -0.000371456, -0.000404358, -0.000438213, -0.000472546, + -0.000507355, -0.000542164, -0.000576973, -0.000611782, -0.000646591, -0.000680923, -0.000714302, -0.000747204, + -0.000779152, -0.000809669, -0.000838757, -0.000866413, -0.000891685, -0.000915051, -0.000935555, -0.000954151, + -0.000968933, -0.000980854, -0.000989437, -0.000994205, -0.000995159, -0.000991821, -0.000983715, 0.000971317, + 0.000953674, 0.000930786, 0.000902653, 0.000868797, 0.000829220, 0.000783920, 0.000731945, 0.000674248, + 0.000610352, 0.000539303, 0.000462532, 0.000378609, 0.000288486, 0.000191689, 0.000088215, -0.000021458, + -0.000137329, -0.000259876, -0.000388145, -0.000522137, -0.000661850, -0.000806808, -0.000956535, -0.001111031, + -0.001269817, -0.001432419, -0.001597881, -0.001766682, -0.001937389, -0.002110004, -0.002283096, -0.002457142, + -0.002630711, -0.002803326, -0.002974033, -0.003141880, -0.003306866, -0.003467083, -0.003622532, -0.003771782, + -0.003914356, -0.004048824, -0.004174709, -0.004290581, -0.004395962, -0.004489899, -0.004570484, -0.004638195, + -0.004691124, -0.004728317, -0.004748821, -0.004752159, -0.004737377, -0.004703045, -0.004649162, -0.004573822, + -0.004477024, -0.004357815, -0.004215240, -0.004049301, -0.003858566, -0.003643036, -0.003401756, 0.003134727, + 0.002841473, 0.002521515, 0.002174854, 0.001800537, 0.001399517, 0.000971317, 0.000515938, 0.000033379, + -0.000475883, -0.001011848, -0.001573563, -0.002161503, -0.002774239, -0.003411293, -0.004072189, -0.004756451, + -0.005462170, -0.006189346, -0.006937027, -0.007703304, -0.008487225, -0.009287834, -0.010103703, -0.010933399, + -0.011775017, -0.012627602, -0.013489246, -0.014358521, -0.015233517, -0.016112804, -0.016994476, -0.017876148, + -0.018756866, -0.019634247, -0.020506859, -0.021372318, -0.022228718, -0.023074150, -0.023907185, -0.024725437, + -0.025527000, -0.026310921, -0.027073860, -0.027815342, -0.028532982, -0.029224873, -0.029890060, -0.030526638, + -0.031132698, -0.031706810, -0.032248020, -0.032754898, -0.033225536, -0.033659935, -0.034055710, -0.034412861, + -0.034730434, -0.035007000, -0.035242081, -0.035435200, -0.035586357, -0.035694122, -0.035758972, 0.035780907, + 0.035758972, 0.035694122, 0.035586357, 0.035435200, 0.035242081, 0.035007000, 0.034730434, 0.034412861, + 0.034055710, 0.033659935, 0.033225536, 0.032754898, 0.032248020, 0.031706810, 0.031132698, 0.030526638, + 0.029890060, 0.029224873, 0.028532982, 0.027815342, 0.027073860, 0.026310921, 0.025527000, 0.024725437, + 0.023907185, 0.023074150, 0.022228718, 0.021372318, 0.020506859, 0.019634247, 0.018756866, 0.017876148, + 0.016994476, 0.016112804, 0.015233517, 0.014358521, 0.013489246, 0.012627602, 0.011775017, 0.010933399, + 0.010103703, 0.009287834, 0.008487225, 0.007703304, 0.006937027, 0.006189346, 0.005462170, 0.004756451, + 0.004072189, 0.003411293, 0.002774239, 0.002161503, 0.001573563, 0.001011848, 0.000475883, -0.000033379, + -0.000515938, -0.000971317, -0.001399517, -0.001800537, -0.002174854, -0.002521515, -0.002841473, 0.003134727, + 0.003401756, 0.003643036, 0.003858566, 0.004049301, 0.004215240, 0.004357815, 0.004477024, 0.004573822, + 0.004649162, 0.004703045, 0.004737377, 0.004752159, 0.004748821, 0.004728317, 0.004691124, 0.004638195, + 0.004570484, 0.004489899, 0.004395962, 0.004290581, 0.004174709, 0.004048824, 0.003914356, 0.003771782, + 0.003622532, 0.003467083, 0.003306866, 0.003141880, 0.002974033, 0.002803326, 0.002630711, 0.002457142, + 0.002283096, 0.002110004, 0.001937389, 0.001766682, 0.001597881, 0.001432419, 0.001269817, 0.001111031, + 0.000956535, 0.000806808, 0.000661850, 0.000522137, 0.000388145, 0.000259876, 0.000137329, 0.000021458, + -0.000088215, -0.000191689, -0.000288486, -0.000378609, -0.000462532, -0.000539303, -0.000610352, -0.000674248, + -0.000731945, -0.000783920, -0.000829220, -0.000868797, -0.000902653, -0.000930786, -0.000953674, 0.000971317, + 0.000983715, 0.000991821, 0.000995159, 0.000994205, 0.000989437, 0.000980854, 0.000968933, 0.000954151, + 0.000935555, 0.000915051, 0.000891685, 0.000866413, 0.000838757, 0.000809669, 0.000779152, 0.000747204, + 0.000714302, 0.000680923, 0.000646591, 0.000611782, 0.000576973, 0.000542164, 0.000507355, 0.000472546, + 0.000438213, 0.000404358, 0.000371456, 0.000339031, 0.000307560, 0.000277042, 0.000247478, 0.000218868, + 0.000191212, 0.000165462, 0.000140190, 0.000116348, 0.000093937, 0.000072956, 0.000052929, 0.000034332, + 0.000017166, 0.000000954, -0.000013828, -0.000027180, -0.000039577, -0.000050545, -0.000060558, -0.000069618, + -0.000077724, -0.000084400, -0.000090122, -0.000095367, -0.000099182, -0.000102520, -0.000105381, -0.000106812, + -0.000108242, -0.000108719, -0.000108719, -0.000108242, -0.000107288, -0.000105858, -0.000103951, 0.000101566, + 0.000099182, 0.000096321, 0.000093460, 0.000090599, 0.000087261, 0.000083923, 0.000080585, 0.000076771, + 0.000073433, 0.000070095, 0.000066280, 0.000062943, 0.000059605, 0.000055790, 0.000052929, 0.000049591, + 0.000046253, 0.000043392, 0.000040531, 0.000037670, 0.000034809, 0.000032425, 0.000030041, 0.000027657, + 0.000025272, 0.000023365, 0.000021458, 0.000019550, 0.000018120, 0.000016689, 0.000014782, 0.000013828, + 0.000012398, 0.000011444, 0.000010014, 0.000009060, 0.000008106, 0.000007629, 0.000006676, 0.000006199, + 0.000005245, 0.000004768, 0.000004292, 0.000003815, 0.000003338, 0.000003338, 0.000002861, 0.000002384, + 0.000002384, 0.000001907, 0.000001907, 0.000001431, 0.000001431, 0.000000954, 0.000000954, 0.000000954, + 0.000000954, 0.000000477, 0.000000477, 0.000000477, 0.000000477, 0.000000477, 0.000000477 + }; + +static int scale_factor_table[64]; +#ifdef USE_FLOATS +static float scale_factor_inv_table[64]; +#else +static INT8 scale_factor_shift[64]; +static unsigned short scale_factor_mult[64]; +#endif +static unsigned char scale_diff_table[128]; + +static const int sblimit_table[5] = { 27 , 30 , 8, 12 , 30 }; + +static const int quant_steps[17] = { + 3, 5, 7, 9, 15, + 31, 63, 127, 255, 511, + 1023, 2047, 4095, 8191, 16383, + 32767, 65535 +}; + +/* we use a negative value if grouped */ +static const int quant_bits[17] = { + -5, -7, 3, -10, 4, + 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, + 15, 16 +}; + +/* signal to noise ratio of each quantification step (could be + computed from quant_steps[]). The values are dB multiplied by 10 +*/ +static unsigned short quant_snr[17] = { + 70, 110, 160, 208, + 253, 316, 378, 439, + 499, 559, 620, 680, + 740, 800, 861, 920, + 980 +}; + + +/* total number of bits per allocation group */ +static unsigned short total_quant_bits[17]; + +/* encoding tables which give the quantization index. Note how it is + possible to store them efficiently ! */ +static const unsigned char alloc_table_0[] = { + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, +}; + +static const unsigned char alloc_table_1[] = { + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 3, 0, 1, 2, 3, 4, 5, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, + 2, 0, 1, 16, +}; + +static const unsigned char alloc_table_2[] = { + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, +}; + +static const unsigned char alloc_table_3[] = { + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 4, 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, +}; + +static const unsigned char alloc_table_4[] = { + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 4, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 3, 0, 1, 3, 4, 5, 6, 7, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, + 2, 0, 1, 3, +}; + +const unsigned char *alloc_tables[5] = +{ alloc_table_0, alloc_table_1, alloc_table_2, alloc_table_3, alloc_table_4, }; + +/* fixed psycho acoustic model. Values of SNR taken from the 'toolame' + project */ +const float fixed_smr[SBLIMIT] = { + 30, 17, 16, 10, 3, 12, 8, 2.5, + 5, 5, 6, 6, 5, 6, 10, 6, + -4, -10, -21, -30, -42, -55, -68, -75, + -75, -75, -75, -75, -91, -107, -110, -108 +}; + +const unsigned char nb_scale_factors[4] = { 3, 2, 1, 2 }; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c new file mode 100644 index 0000000000..467062ddf6 --- /dev/null +++ b/libavcodec/mpegvideo.c @@ -0,0 +1,1281 @@ +/* + * The simplest mpeg encoder (well, it was the simplest!) + * Copyright (c) 2000,2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include "avcodec.h" +#include "dsputil.h" +#include "mpegvideo.h" + +#define EDGE_WIDTH 16 + +/* enable all paranoid tests for rounding, overflows, etc... */ +//#define PARANOID + +//#define DEBUG + +/* for jpeg fast DCT */ +#define CONST_BITS 14 + +static const unsigned short aanscales[64] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 +}; + +static UINT8 h263_chroma_roundtab[16] = { + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, +}; + +static void encode_picture(MpegEncContext *s, int picture_number); +static void rate_control_init(MpegEncContext *s); +static int rate_estimate_qscale(MpegEncContext *s); + +/* default motion estimation */ +int motion_estimation_method = ME_LOG; + +/* XXX: should use variable shift ? */ +#define QMAT_SHIFT_MMX 19 +#define QMAT_SHIFT 25 + +static void convert_matrix(int *qmat, const UINT16 *quant_matrix, int qscale) +{ + int i; + + if (av_fdct == jpeg_fdct_ifast) { + for(i=0;i<64;i++) { + /* 16 <= qscale * quant_matrix[i] <= 7905 */ + /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ + + qmat[i] = (int)((1ULL << (QMAT_SHIFT + 11)) / (aanscales[i] * qscale * quant_matrix[i])); + } + } else { + for(i=0;i<64;i++) { + /* We can safely suppose that 16 <= quant_matrix[i] <= 255 + So 16 <= qscale * quant_matrix[i] <= 7905 + so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905 + */ + qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); + } + } +} + +/* init common structure for both encoder and decoder */ +int MPV_common_init(MpegEncContext *s) +{ + int c_size, i; + UINT8 *pict; + + s->mb_width = (s->width + 15) / 16; + s->mb_height = (s->height + 15) / 16; + s->linesize = s->mb_width * 16 + 2 * EDGE_WIDTH; + + for(i=0;i<3;i++) { + int w, h, shift, pict_start; + + w = s->linesize; + h = s->mb_height * 16 + 2 * EDGE_WIDTH; + shift = (i == 0) ? 0 : 1; + c_size = (w >> shift) * (h >> shift); + pict_start = (w >> shift) * (EDGE_WIDTH >> shift) + (EDGE_WIDTH >> shift); + + pict = av_mallocz(c_size); + if (pict == NULL) + goto fail; + s->last_picture_base[i] = pict; + s->last_picture[i] = pict + pict_start; + + pict = av_mallocz(c_size); + if (pict == NULL) + goto fail; + s->next_picture_base[i] = pict; + s->next_picture[i] = pict + pict_start; + + if (s->has_b_frames) { + pict = av_mallocz(c_size); + if (pict == NULL) + goto fail; + s->aux_picture_base[i] = pict; + s->aux_picture[i] = pict + pict_start; + } + } + + if (s->out_format == FMT_H263) { + int size; + /* MV prediction */ + size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); + s->motion_val = malloc(size * 2 * sizeof(INT16)); + if (s->motion_val == NULL) + goto fail; + memset(s->motion_val, 0, size * 2 * sizeof(INT16)); + } + + if (s->h263_pred) { + int y_size, c_size, i, size; + + /* dc values */ + + y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2); + c_size = (s->mb_width + 2) * (s->mb_height + 2); + size = y_size + 2 * c_size; + s->dc_val[0] = malloc(size * sizeof(INT16)); + if (s->dc_val[0] == NULL) + goto fail; + s->dc_val[1] = s->dc_val[0] + y_size; + s->dc_val[2] = s->dc_val[1] + c_size; + for(i=0;i<size;i++) + s->dc_val[0][i] = 1024; + + /* ac values */ + s->ac_val[0] = av_mallocz(size * sizeof(INT16) * 16); + if (s->ac_val[0] == NULL) + goto fail; + s->ac_val[1] = s->ac_val[0] + y_size; + s->ac_val[2] = s->ac_val[1] + c_size; + + /* cbp values */ + s->coded_block = av_mallocz(y_size); + if (!s->coded_block) + goto fail; + } + /* default structure is frame */ + s->picture_structure = PICT_FRAME; + + /* init default q matrix (only for mpeg and mjpeg) */ + for(i=0;i<64;i++) { + s->intra_matrix[i] = default_intra_matrix[i]; + s->chroma_intra_matrix[i] = default_intra_matrix[i]; + s->non_intra_matrix[i] = default_non_intra_matrix[i]; + s->chroma_non_intra_matrix[i] = default_non_intra_matrix[i]; + } + s->context_initialized = 1; + return 0; + fail: + if (s->motion_val) + free(s->motion_val); + if (s->dc_val[0]) + free(s->dc_val[0]); + if (s->ac_val[0]) + free(s->ac_val[0]); + if (s->coded_block) + free(s->coded_block); + for(i=0;i<3;i++) { + if (s->last_picture_base[i]) + free(s->last_picture_base[i]); + if (s->next_picture_base[i]) + free(s->next_picture_base[i]); + if (s->aux_picture_base[i]) + free(s->aux_picture_base[i]); + } + return -1; +} + +/* init common structure for both encoder and decoder */ +void MPV_common_end(MpegEncContext *s) +{ + int i; + + if (s->motion_val) + free(s->motion_val); + if (s->h263_pred) { + free(s->dc_val[0]); + free(s->ac_val[0]); + free(s->coded_block); + } + for(i=0;i<3;i++) { + free(s->last_picture_base[i]); + free(s->next_picture_base[i]); + if (s->has_b_frames) + free(s->aux_picture_base[i]); + } + s->context_initialized = 0; +} + +/* init video encoder */ +int MPV_encode_init(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + + s->bit_rate = avctx->bit_rate; + s->frame_rate = avctx->frame_rate; + s->width = avctx->width; + s->height = avctx->height; + s->gop_size = avctx->gop_size; + if (s->gop_size <= 1) { + s->intra_only = 1; + s->gop_size = 12; + } else { + s->intra_only = 0; + } + s->full_search = motion_estimation_method; + + s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE); + + switch(avctx->codec->id) { + case CODEC_ID_MPEG1VIDEO: + s->out_format = FMT_MPEG1; + break; + case CODEC_ID_MJPEG: + s->out_format = FMT_MJPEG; + s->intra_only = 1; /* force intra only for jpeg */ + if (mjpeg_init(s) < 0) + return -1; + break; + case CODEC_ID_H263: + if (h263_get_picture_format(s->width, s->height) == 7) + return -1; + s->out_format = FMT_H263; + break; + case CODEC_ID_H263P: + s->out_format = FMT_H263; + s->h263_plus = 1; + /* XXX: not unrectricted mv yet */ + break; + case CODEC_ID_RV10: + s->out_format = FMT_H263; + s->h263_rv10 = 1; + break; + case CODEC_ID_OPENDIVX: + s->out_format = FMT_H263; + s->h263_pred = 1; + s->unrestricted_mv = 1; + break; + case CODEC_ID_MSMPEG4: + s->out_format = FMT_H263; + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->unrestricted_mv = 1; + break; + default: + return -1; + } + + if (s->out_format == FMT_H263) + h263_encode_init_vlc(s); + + /* init */ + if (MPV_common_init(s) < 0) + return -1; + + /* rate control init */ + rate_control_init(s); + + s->picture_number = 0; + s->fake_picture_number = 0; + /* motion detector init */ + s->f_code = 1; + + return 0; +} + +int MPV_encode_end(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + +#ifdef STATS + print_stats(); +#endif + MPV_common_end(s); + if (s->out_format == FMT_MJPEG) + mjpeg_close(s); + return 0; +} + +/* draw the edges of width 'w' of an image of size width, height */ +static void draw_edges(UINT8 *buf, int wrap, int width, int height, int w) +{ + UINT8 *ptr, *last_line; + int i; + + last_line = buf + (height - 1) * wrap; + for(i=0;i<w;i++) { + /* top and bottom */ + memcpy(buf - (i + 1) * wrap, buf, width); + memcpy(last_line + (i + 1) * wrap, last_line, width); + } + /* left and right */ + ptr = buf; + for(i=0;i<height;i++) { + memset(ptr - w, ptr[0], w); + memset(ptr + width, ptr[width-1], w); + ptr += wrap; + } + /* corners */ + for(i=0;i<w;i++) { + memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */ + memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */ + memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */ + memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */ + } +} + + +/* generic function for encode/decode called before a frame is coded/decoded */ +void MPV_frame_start(MpegEncContext *s) +{ + int i; + UINT8 *tmp; + + if (s->pict_type == B_TYPE) { + for(i=0;i<3;i++) { + s->current_picture[i] = s->aux_picture[i]; + } + } else { + for(i=0;i<3;i++) { + /* swap next and last */ + tmp = s->last_picture[i]; + s->last_picture[i] = s->next_picture[i]; + s->next_picture[i] = tmp; + s->current_picture[i] = tmp; + } + } +} + +/* generic function for encode/decode called after a frame has been coded/decoded */ +void MPV_frame_end(MpegEncContext *s) +{ + /* draw edge for correct motion prediction if outside */ + if (s->pict_type != B_TYPE) { + draw_edges(s->current_picture[0], s->linesize, s->width, s->height, EDGE_WIDTH); + draw_edges(s->current_picture[1], s->linesize/2, s->width/2, s->height/2, EDGE_WIDTH/2); + draw_edges(s->current_picture[2], s->linesize/2, s->width/2, s->height/2, EDGE_WIDTH/2); + } +} + +int MPV_encode_picture(AVCodecContext *avctx, + unsigned char *buf, int buf_size, void *data) +{ + MpegEncContext *s = avctx->priv_data; + AVPicture *pict = data; + int i, j; + + if (s->fixed_qscale) + s->qscale = avctx->quality; + + init_put_bits(&s->pb, buf, buf_size, NULL, NULL); + + if (!s->intra_only) { + /* first picture of GOP is intra */ + if ((s->picture_number % s->gop_size) == 0) + s->pict_type = I_TYPE; + else + s->pict_type = P_TYPE; + } else { + s->pict_type = I_TYPE; + } + avctx->key_frame = (s->pict_type == I_TYPE); + + MPV_frame_start(s); + + for(i=0;i<3;i++) { + UINT8 *src = pict->data[i]; + UINT8 *dest = s->current_picture[i]; + int src_wrap = pict->linesize[i]; + int dest_wrap = s->linesize; + int w = s->width; + int h = s->height; + + if (i >= 1) { + dest_wrap >>= 1; + w >>= 1; + h >>= 1; + } + + for(j=0;j<h;j++) { + memcpy(dest, src, w); + dest += dest_wrap; + src += src_wrap; + } + s->new_picture[i] = s->current_picture[i]; + } + + encode_picture(s, s->picture_number); + + MPV_frame_end(s); + s->picture_number++; + + if (s->out_format == FMT_MJPEG) + mjpeg_picture_trailer(s); + + flush_put_bits(&s->pb); + s->total_bits += (s->pb.buf_ptr - s->pb.buf) * 8; + avctx->quality = s->qscale; + return s->pb.buf_ptr - s->pb.buf; +} + +static inline int clip(int a, int amin, int amax) +{ + if (a < amin) + return amin; + else if (a > amax) + return amax; + else + return a; +} + +static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); +static int dct_quantize_mmx(MpegEncContext *s, + DCTELEM *block, int n, + int qscale); +static void dct_unquantize(MpegEncContext *s, DCTELEM *block, int n, int qscale); + +/* apply one mpeg motion vector to the three components */ +static inline void mpeg_motion(MpegEncContext *s, + UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, + int dest_offset, + UINT8 **ref_picture, int src_offset, + int field_based, op_pixels_func *pix_op, + int motion_x, int motion_y, int h) +{ + UINT8 *ptr; + int dxy, offset, mx, my, src_x, src_y, height, linesize; + + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + src_x = s->mb_x * 16 + (motion_x >> 1); + src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1); + + /* WARNING: do no forget half pels */ + height = s->height >> field_based; + src_x = clip(src_x, -16, s->width); + if (src_x == s->width) + dxy &= ~1; + src_y = clip(src_y, -16, height); + if (src_y == height) + dxy &= ~2; + linesize = s->linesize << field_based; + ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset; + dest_y += dest_offset; + pix_op[dxy](dest_y, ptr, linesize, h); + pix_op[dxy](dest_y + 8, ptr + 8, linesize, h); + + if (s->out_format == FMT_H263) { + dxy = 0; + if ((motion_x & 3) != 0) + dxy |= 1; + if ((motion_y & 3) != 0) + dxy |= 2; + mx = motion_x >> 2; + my = motion_y >> 2; + } else { + mx = motion_x / 2; + my = motion_y / 2; + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + } + + src_x = s->mb_x * 8 + mx; + src_y = s->mb_y * (8 >> field_based) + my; + src_x = clip(src_x, -8, s->width >> 1); + if (src_x == (s->width >> 1)) + dxy &= ~1; + src_y = clip(src_y, -8, height >> 1); + if (src_y == (height >> 1)) + dxy &= ~2; + + offset = (src_y * (linesize >> 1)) + src_x + (src_offset >> 1); + ptr = ref_picture[1] + offset; + pix_op[dxy](dest_cb + (dest_offset >> 1), ptr, linesize >> 1, h >> 1); + ptr = ref_picture[2] + offset; + pix_op[dxy](dest_cr + (dest_offset >> 1), ptr, linesize >> 1, h >> 1); +} + +static inline void MPV_motion(MpegEncContext *s, + UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, + int dir, UINT8 **ref_picture, + op_pixels_func *pix_op) +{ + int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y; + int mb_x, mb_y, i; + UINT8 *ptr, *dest; + + mb_x = s->mb_x; + mb_y = s->mb_y; + + switch(s->mv_type) { + case MV_TYPE_16X16: + mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, + ref_picture, 0, + 0, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], 16); + break; + case MV_TYPE_8X8: + for(i=0;i<4;i++) { + motion_x = s->mv[dir][i][0]; + motion_y = s->mv[dir][i][1]; + + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8; + src_y = mb_y * 16 + (motion_y >> 1) + ((i >> 1) & 1) * 8; + + /* WARNING: do no forget half pels */ + src_x = clip(src_x, -16, s->width); + if (src_x == s->width) + dxy &= ~1; + src_y = clip(src_y, -16, s->height); + if (src_y == s->height) + dxy &= ~2; + + ptr = ref_picture[0] + (src_y * s->linesize) + (src_x); + dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize; + pix_op[dxy](dest, ptr, s->linesize, 8); + } + /* In case of 8X8, we construct a single chroma motion vector + with a special rounding */ + mx = 0; + my = 0; + for(i=0;i<4;i++) { + mx += s->mv[dir][i][0]; + my += s->mv[dir][i][1]; + } + if (mx >= 0) + mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); + else { + mx = -mx; + mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); + } + if (my >= 0) + my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); + else { + my = -my; + my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); + } + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + + src_x = mb_x * 8 + mx; + src_y = mb_y * 8 + my; + src_x = clip(src_x, -8, s->width/2); + if (src_x == s->width/2) + dxy &= ~1; + src_y = clip(src_y, -8, s->height/2); + if (src_y == s->height/2) + dxy &= ~2; + + offset = (src_y * (s->linesize >> 1)) + src_x; + ptr = ref_picture[1] + offset; + pix_op[dxy](dest_cb, ptr, s->linesize >> 1, 8); + ptr = ref_picture[2] + offset; + pix_op[dxy](dest_cr, ptr, s->linesize >> 1, 8); + break; + case MV_TYPE_FIELD: + if (s->picture_structure == PICT_FRAME) { + /* top field */ + mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, + ref_picture, s->field_select[dir][0] ? s->linesize : 0, + 1, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], 8); + /* bottom field */ + mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize, + ref_picture, s->field_select[dir][1] ? s->linesize : 0, + 1, pix_op, + s->mv[dir][1][0], s->mv[dir][1][1], 8); + } else { + + + } + break; + } +} + + +/* put block[] to dest[] */ +static inline void put_dct(MpegEncContext *s, + DCTELEM *block, int i, UINT8 *dest, int line_size) +{ + if (!s->mpeg2) + dct_unquantize(s, block, i, s->qscale); + j_rev_dct (block); + put_pixels_clamped(block, dest, line_size); +} + +/* add block[] to dest[] */ +static inline void add_dct(MpegEncContext *s, + DCTELEM *block, int i, UINT8 *dest, int line_size) +{ + if (s->block_last_index[i] >= 0) { + if (!s->mpeg2) + dct_unquantize(s, block, i, s->qscale); + j_rev_dct (block); + add_pixels_clamped(block, dest, line_size); + } +} + +/* generic function called after a macroblock has been parsed by the + decoder or after it has been encoded by the encoder. + + Important variables used: + s->mb_intra : true if intra macroblock + s->mv_dir : motion vector direction + s->mv_type : motion vector type + s->mv : motion vector + s->interlaced_dct : true if interlaced dct used (mpeg2) + */ +void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) +{ + int mb_x, mb_y, motion_x, motion_y; + int dct_linesize, dct_offset; + op_pixels_func *op_pix; + + mb_x = s->mb_x; + mb_y = s->mb_y; + + /* update DC predictors for P macroblocks */ + if (!s->mb_intra) { + if (s->h263_pred) { + int wrap, x, y, v; + wrap = 2 * s->mb_width + 2; + v = 1024; + x = 2 * mb_x + 1; + y = 2 * mb_y + 1; + s->dc_val[0][(x) + (y) * wrap] = v; + s->dc_val[0][(x + 1) + (y) * wrap] = v; + s->dc_val[0][(x) + (y + 1) * wrap] = v; + s->dc_val[0][(x + 1) + (y + 1) * wrap] = v; + /* ac pred */ + memset(s->ac_val[0][(x) + (y) * wrap], 0, 16 * sizeof(INT16)); + memset(s->ac_val[0][(x + 1) + (y) * wrap], 0, 16 * sizeof(INT16)); + memset(s->ac_val[0][(x) + (y + 1) * wrap], 0, 16 * sizeof(INT16)); + memset(s->ac_val[0][(x + 1) + (y + 1) * wrap], 0, 16 * sizeof(INT16)); + if (s->h263_msmpeg4) { + s->coded_block[(x) + (y) * wrap] = 0; + s->coded_block[(x + 1) + (y) * wrap] = 0; + s->coded_block[(x) + (y + 1) * wrap] = 0; + s->coded_block[(x + 1) + (y + 1) * wrap] = 0; + } + /* chroma */ + wrap = s->mb_width + 2; + x = mb_x + 1; + y = mb_y + 1; + s->dc_val[1][(x) + (y) * wrap] = v; + s->dc_val[2][(x) + (y) * wrap] = v; + /* ac pred */ + memset(s->ac_val[1][(x) + (y) * wrap], 0, 16 * sizeof(INT16)); + memset(s->ac_val[2][(x) + (y) * wrap], 0, 16 * sizeof(INT16)); + } else { + s->last_dc[0] = 128 << s->intra_dc_precision; + s->last_dc[1] = 128 << s->intra_dc_precision; + s->last_dc[2] = 128 << s->intra_dc_precision; + } + } + + /* update motion predictor */ + if (s->out_format == FMT_H263) { + int x, y, wrap; + + x = 2 * mb_x + 1; + y = 2 * mb_y + 1; + wrap = 2 * s->mb_width + 2; + if (s->mb_intra) { + motion_x = 0; + motion_y = 0; + goto motion_init; + } else if (s->mv_type == MV_TYPE_16X16) { + motion_x = s->mv[0][0][0]; + motion_y = s->mv[0][0][1]; + motion_init: + /* no update if 8X8 because it has been done during parsing */ + s->motion_val[(x) + (y) * wrap][0] = motion_x; + s->motion_val[(x) + (y) * wrap][1] = motion_y; + s->motion_val[(x + 1) + (y) * wrap][0] = motion_x; + s->motion_val[(x + 1) + (y) * wrap][1] = motion_y; + s->motion_val[(x) + (y + 1) * wrap][0] = motion_x; + s->motion_val[(x) + (y + 1) * wrap][1] = motion_y; + s->motion_val[(x + 1) + (y + 1) * wrap][0] = motion_x; + s->motion_val[(x + 1) + (y + 1) * wrap][1] = motion_y; + } + } + + if (!s->intra_only) { + UINT8 *dest_y, *dest_cb, *dest_cr; + + dest_y = s->current_picture[0] + (mb_y * 16 * s->linesize) + mb_x * 16; + dest_cb = s->current_picture[1] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8; + dest_cr = s->current_picture[2] + (mb_y * 8 * (s->linesize >> 1)) + mb_x * 8; + + if (s->interlaced_dct) { + dct_linesize = s->linesize * 2; + dct_offset = s->linesize; + } else { + dct_linesize = s->linesize; + dct_offset = s->linesize * 8; + } + + if (!s->mb_intra) { + /* motion handling */ + if (!s->no_rounding) + op_pix = put_pixels_tab; + else + op_pix = put_no_rnd_pixels_tab; + + if (s->mv_dir & MV_DIR_FORWARD) { + MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix); + if (!s->no_rounding) + op_pix = avg_pixels_tab; + else + op_pix = avg_no_rnd_pixels_tab; + } + if (s->mv_dir & MV_DIR_BACKWARD) { + MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix); + } + + /* add dct residue */ + add_dct(s, block[0], 0, dest_y, dct_linesize); + add_dct(s, block[1], 1, dest_y + 8, dct_linesize); + add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); + add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize); + + add_dct(s, block[4], 4, dest_cb, dct_linesize >> 1); + add_dct(s, block[5], 5, dest_cr, dct_linesize >> 1); + } else { + /* dct only in intra block */ + put_dct(s, block[0], 0, dest_y, dct_linesize); + put_dct(s, block[1], 1, dest_y + 8, dct_linesize); + put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); + put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize); + + put_dct(s, block[4], 4, dest_cb, dct_linesize >> 1); + put_dct(s, block[5], 5, dest_cr, dct_linesize >> 1); + } + } +} + +static void encode_picture(MpegEncContext *s, int picture_number) +{ + int mb_x, mb_y, wrap; + UINT8 *ptr; + DCTELEM block[6][64]; + int i, motion_x, motion_y; + + s->picture_number = picture_number; + if (!s->fixed_qscale) + s->qscale = rate_estimate_qscale(s); + + /* precompute matrix */ + if (s->out_format == FMT_MJPEG) { + /* for mjpeg, we do include qscale in the matrix */ + s->intra_matrix[0] = default_intra_matrix[0]; + for(i=1;i<64;i++) + s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3; + convert_matrix(s->q_intra_matrix, s->intra_matrix, 8); + } else { + convert_matrix(s->q_intra_matrix, s->intra_matrix, s->qscale); + convert_matrix(s->q_non_intra_matrix, s->non_intra_matrix, s->qscale); + } + + switch(s->out_format) { + case FMT_MJPEG: + mjpeg_picture_header(s); + break; + case FMT_H263: + if (s->h263_msmpeg4) + msmpeg4_encode_picture_header(s, picture_number); + else if (s->h263_pred) + mpeg4_encode_picture_header(s, picture_number); + else if (s->h263_rv10) + rv10_encode_picture_header(s, picture_number); + else + h263_encode_picture_header(s, picture_number); + break; + case FMT_MPEG1: + mpeg1_encode_picture_header(s, picture_number); + break; + } + + /* init last dc values */ + /* note: quant matrix value (8) is implied here */ + s->last_dc[0] = 128; + s->last_dc[1] = 128; + s->last_dc[2] = 128; + s->mb_incr = 1; + s->last_mv[0][0][0] = 0; + s->last_mv[0][0][1] = 0; + s->mv_type = MV_TYPE_16X16; + s->mv_dir = MV_DIR_FORWARD; + + for(mb_y=0; mb_y < s->mb_height; mb_y++) { + for(mb_x=0; mb_x < s->mb_width; mb_x++) { + + s->mb_x = mb_x; + s->mb_y = mb_y; + + /* compute motion vector and macro block type (intra or non intra) */ + motion_x = 0; + motion_y = 0; + if (s->pict_type == P_TYPE) { + s->mb_intra = estimate_motion(s, mb_x, mb_y, + &motion_x, + &motion_y); + } else { + s->mb_intra = 1; + } + + /* get the pixels */ + wrap = s->linesize; + ptr = s->new_picture[0] + (mb_y * 16 * wrap) + mb_x * 16; + get_pixels(block[0], ptr, wrap); + get_pixels(block[1], ptr + 8, wrap); + get_pixels(block[2], ptr + 8 * wrap, wrap); + get_pixels(block[3], ptr + 8 * wrap + 8, wrap); + wrap = s->linesize >> 1; + ptr = s->new_picture[1] + (mb_y * 8 * wrap) + mb_x * 8; + get_pixels(block[4], ptr, wrap); + + wrap = s->linesize >> 1; + ptr = s->new_picture[2] + (mb_y * 8 * wrap) + mb_x * 8; + get_pixels(block[5], ptr, wrap); + + /* subtract previous frame if non intra */ + if (!s->mb_intra) { + int dxy, offset, mx, my; + + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + ptr = s->last_picture[0] + + ((mb_y * 16 + (motion_y >> 1)) * s->linesize) + + (mb_x * 16 + (motion_x >> 1)); + + sub_pixels_2(block[0], ptr, s->linesize, dxy); + sub_pixels_2(block[1], ptr + 8, s->linesize, dxy); + sub_pixels_2(block[2], ptr + s->linesize * 8, s->linesize, dxy); + sub_pixels_2(block[3], ptr + 8 + s->linesize * 8, s->linesize ,dxy); + + if (s->out_format == FMT_H263) { + /* special rounding for h263 */ + dxy = 0; + if ((motion_x & 3) != 0) + dxy |= 1; + if ((motion_y & 3) != 0) + dxy |= 2; + mx = motion_x >> 2; + my = motion_y >> 2; + } else { + mx = motion_x / 2; + my = motion_y / 2; + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + } + offset = ((mb_y * 8 + my) * (s->linesize >> 1)) + (mb_x * 8 + mx); + ptr = s->last_picture[1] + offset; + sub_pixels_2(block[4], ptr, s->linesize >> 1, dxy); + ptr = s->last_picture[2] + offset; + sub_pixels_2(block[5], ptr, s->linesize >> 1, dxy); + } + + /* DCT & quantize */ + if (s->h263_msmpeg4) { + msmpeg4_dc_scale(s); + } else if (s->h263_pred) { + h263_dc_scale(s); + } else { + /* default quantization values */ + s->y_dc_scale = 8; + s->c_dc_scale = 8; + } + + for(i=0;i<6;i++) { + int last_index; + if (av_fdct == jpeg_fdct_ifast) + last_index = dct_quantize(s, block[i], i, s->qscale); + else + last_index = dct_quantize_mmx(s, block[i], i, s->qscale); + s->block_last_index[i] = last_index; + } + + /* huffman encode */ + switch(s->out_format) { + case FMT_MPEG1: + mpeg1_encode_mb(s, block, motion_x, motion_y); + break; + case FMT_H263: + if (s->h263_msmpeg4) + msmpeg4_encode_mb(s, block, motion_x, motion_y); + else + h263_encode_mb(s, block, motion_x, motion_y); + break; + case FMT_MJPEG: + mjpeg_encode_mb(s, block); + break; + } + + /* decompress blocks so that we keep the state of the decoder */ + s->mv[0][0][0] = motion_x; + s->mv[0][0][1] = motion_y; + + MPV_decode_mb(s, block); + } + } +} + +static int dct_quantize(MpegEncContext *s, + DCTELEM *block, int n, + int qscale) +{ + int i, j, level, last_non_zero, q; + const int *qmat; + + av_fdct (block); + + if (s->mb_intra) { + if (n < 4) + q = s->y_dc_scale; + else + q = s->c_dc_scale; + q = q << 3; + + /* note: block[0] is assumed to be positive */ + block[0] = (block[0] + (q >> 1)) / q; + i = 1; + last_non_zero = 0; + if (s->out_format == FMT_H263) { + qmat = s->q_non_intra_matrix; + } else { + qmat = s->q_intra_matrix; + } + } else { + i = 0; + last_non_zero = -1; + qmat = s->q_non_intra_matrix; + } + + for(;i<64;i++) { + j = zigzag_direct[i]; + level = block[j]; + level = level * qmat[j]; +#ifdef PARANOID + { + static int count = 0; + int level1, level2, qmat1; + double val; + if (qmat == s->q_non_intra_matrix) { + qmat1 = default_non_intra_matrix[j] * s->qscale; + } else { + qmat1 = default_intra_matrix[j] * s->qscale; + } + if (av_fdct != jpeg_fdct_ifast) + val = ((double)block[j] * 8.0) / (double)qmat1; + else + val = ((double)block[j] * 8.0 * 2048.0) / + ((double)qmat1 * aanscales[j]); + level1 = (int)val; + level2 = level / (1 << (QMAT_SHIFT - 3)); + if (level1 != level2) { + fprintf(stderr, "%d: quant error qlevel=%d wanted=%d level=%d qmat1=%d qmat=%d wantedf=%0.6f\n", + count, level2, level1, block[j], qmat1, qmat[j], + val); + count++; + } + + } +#endif + /* XXX: slight error for the low range. Test should be equivalent to + (level <= -(1 << (QMAT_SHIFT - 3)) || level >= (1 << + (QMAT_SHIFT - 3))) + */ + if (((level << (31 - (QMAT_SHIFT - 3))) >> (31 - (QMAT_SHIFT - 3))) != + level) { + level = level / (1 << (QMAT_SHIFT - 3)); + /* XXX: currently, this code is not optimal. the range should be: + mpeg1: -255..255 + mpeg2: -2048..2047 + h263: -128..127 + mpeg4: -2048..2047 + */ + if (level > 127) + level = 127; + else if (level < -128) + level = -128; + block[j] = level; + last_non_zero = i; + } else { + block[j] = 0; + } + } + return last_non_zero; +} + +static int dct_quantize_mmx(MpegEncContext *s, + DCTELEM *block, int n, + int qscale) +{ + int i, j, level, last_non_zero, q; + const int *qmat; + + av_fdct (block); + + if (s->mb_intra) { + if (n < 4) + q = s->y_dc_scale; + else + q = s->c_dc_scale; + + /* note: block[0] is assumed to be positive */ + block[0] = (block[0] + (q >> 1)) / q; + i = 1; + last_non_zero = 0; + if (s->out_format == FMT_H263) { + qmat = s->q_non_intra_matrix; + } else { + qmat = s->q_intra_matrix; + } + } else { + i = 0; + last_non_zero = -1; + qmat = s->q_non_intra_matrix; + } + + for(;i<64;i++) { + j = zigzag_direct[i]; + level = block[j]; + level = level * qmat[j]; + /* XXX: slight error for the low range. Test should be equivalent to + (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 << + (QMAT_SHIFT_MMX - 3))) + */ + if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != + level) { + level = level / (1 << (QMAT_SHIFT_MMX - 3)); + /* XXX: currently, this code is not optimal. the range should be: + mpeg1: -255..255 + mpeg2: -2048..2047 + h263: -128..127 + mpeg4: -2048..2047 + */ + if (level > 127) + level = 127; + else if (level < -128) + level = -128; + block[j] = level; + last_non_zero = i; + } else { + block[j] = 0; + } + } + return last_non_zero; +} + +static void dct_unquantize(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level; + const UINT16 *quant_matrix; + + if (s->mb_intra) { + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + if (s->out_format == FMT_H263) { + i = 1; + goto unquant_even; + } + /* XXX: only mpeg1 */ + quant_matrix = s->intra_matrix; + for(i=1;i<64;i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = -level; + level = (int)(level * qscale * quant_matrix[i]) >> 3; + level = (level - 1) | 1; + level = -level; + } else { + level = (int)(level * qscale * quant_matrix[i]) >> 3; + level = (level - 1) | 1; + } +#ifdef PARANOID + if (level < -2048 || level > 2047) + fprintf(stderr, "unquant error %d %d\n", i, level); +#endif + block[i] = level; + } + } + } else { + i = 0; + unquant_even: + quant_matrix = s->non_intra_matrix; + for(;i<64;i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = -level; + level = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[i]))) >> 4; + level = (level - 1) | 1; + level = -level; + } else { + level = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[i]))) >> 4; + level = (level - 1) | 1; + } +#ifdef PARANOID + if (level < -2048 || level > 2047) + fprintf(stderr, "unquant error %d %d\n", i, level); +#endif + block[i] = level; + } + } + } +} + + +/* rate control */ + +/* an I frame is I_FRAME_SIZE_RATIO bigger than a P frame */ +#define I_FRAME_SIZE_RATIO 3.0 +#define QSCALE_K 20 + +static void rate_control_init(MpegEncContext *s) +{ + s->wanted_bits = 0; + + if (s->intra_only) { + s->I_frame_bits = ((INT64)s->bit_rate * FRAME_RATE_BASE) / s->frame_rate; + s->P_frame_bits = s->I_frame_bits; + } else { + s->P_frame_bits = (int) ((float)(s->gop_size * s->bit_rate) / + (float)((float)s->frame_rate / FRAME_RATE_BASE * (I_FRAME_SIZE_RATIO + s->gop_size - 1))); + s->I_frame_bits = (int)(s->P_frame_bits * I_FRAME_SIZE_RATIO); + } + +#if defined(DEBUG) + printf("I_frame_size=%d P_frame_size=%d\n", + s->I_frame_bits, s->P_frame_bits); +#endif +} + + +/* + * This heuristic is rather poor, but at least we do not have to + * change the qscale at every macroblock. + */ +static int rate_estimate_qscale(MpegEncContext *s) +{ + long long total_bits = s->total_bits; + float q; + int qscale, diff, qmin; + + if (s->pict_type == I_TYPE) { + s->wanted_bits += s->I_frame_bits; + } else { + s->wanted_bits += s->P_frame_bits; + } + diff = s->wanted_bits - total_bits; + q = 31.0 - (float)diff / (QSCALE_K * s->mb_height * s->mb_width); + /* adjust for I frame */ + if (s->pict_type == I_TYPE && !s->intra_only) { + q /= I_FRAME_SIZE_RATIO; + } + + /* using a too small Q scale leeds to problems in mpeg1 and h263 + because AC coefficients are clamped to 255 or 127 */ + qmin = 3; + if (q < qmin) + q = qmin; + else if (q > 31) + q = 31; + qscale = (int)(q + 0.5); +#if defined(DEBUG) + printf("%d: total=%Ld br=%0.1f diff=%d qest=%0.1f\n", + s->picture_number, + total_bits, + (float)s->frame_rate / FRAME_RATE_BASE * + total_bits / s->picture_number, + diff, q); +#endif + return qscale; +} + +AVCodec mpeg1video_encoder = { + "mpeg1video", + CODEC_TYPE_VIDEO, + CODEC_ID_MPEG1VIDEO, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec h263_encoder = { + "h263", + CODEC_TYPE_VIDEO, + CODEC_ID_H263, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec h263p_encoder = { + "h263p", + CODEC_TYPE_VIDEO, + CODEC_ID_H263P, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec rv10_encoder = { + "rv10", + CODEC_TYPE_VIDEO, + CODEC_ID_RV10, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec mjpeg_encoder = { + "mjpeg", + CODEC_TYPE_VIDEO, + CODEC_ID_MJPEG, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec opendivx_encoder = { + "opendivx", + CODEC_TYPE_VIDEO, + CODEC_ID_OPENDIVX, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + +AVCodec msmpeg4_encoder = { + "msmpeg4", + CODEC_TYPE_VIDEO, + CODEC_ID_MSMPEG4, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h new file mode 100644 index 0000000000..b04d7b7693 --- /dev/null +++ b/libavcodec/mpegvideo.h @@ -0,0 +1,273 @@ +/* + * Generic DCT based hybrid video encoder + * Copyright (c) 2000,2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Macros for picture code type. */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 + +enum OutputFormat { + FMT_MPEG1, + FMT_H263, + FMT_MJPEG, +}; + +#define MPEG_BUF_SIZE (16 * 1024) + +typedef struct MpegEncContext { + /* the following parameters must be initialized before encoding */ + int width, height; /* picture size. must be a multiple of 16 */ + int gop_size; + int frame_rate; /* number of frames per second */ + int intra_only; /* if true, only intra pictures are generated */ + int bit_rate; /* wanted bit rate */ + enum OutputFormat out_format; /* output format */ + int h263_plus; /* h263 plus headers */ + int h263_rv10; /* use RV10 variation for H263 */ + int h263_pred; /* use OpenDIVX (aka mpeg4) ac/dc predictions */ + int h263_msmpeg4; /* generate MSMPEG4 compatible stream */ + int h263_intel; /* use I263 intel h263 header */ + int fixed_qscale; /* fixed qscale if non zero */ + /* the following fields are managed internally by the encoder */ + + /* bit output */ + PutBitContext pb; + + /* sequence parameters */ + int context_initialized; + int picture_number; + int fake_picture_number; /* picture number at the bitstream frame rate */ + int gop_picture_number; /* index of the first picture of a GOP */ + int mb_width, mb_height; + int linesize; /* line size, in bytes, may be different from width */ + UINT8 *new_picture[3]; /* picture to be compressed */ + UINT8 *last_picture[3]; /* previous picture */ + UINT8 *last_picture_base[3]; /* real start of the picture */ + UINT8 *next_picture[3]; /* previous picture (for bidir pred) */ + UINT8 *next_picture_base[3]; /* real start of the picture */ + UINT8 *aux_picture[3]; /* aux picture (for B frames only) */ + UINT8 *aux_picture_base[3]; /* real start of the picture */ + UINT8 *current_picture[3]; /* buffer to store the decompressed current picture */ + int last_dc[3]; /* last DC values for MPEG1 */ + INT16 *dc_val[3]; /* used for mpeg4 DC prediction */ + int y_dc_scale, c_dc_scale; + UINT8 *coded_block; /* used for coded block pattern prediction */ + INT16 (*ac_val[3])[16]; /* used for for mpeg4 AC prediction */ + int ac_pred; + + int qscale; + int pict_type; + int frame_rate_index; + /* motion compensation */ + int unrestricted_mv; + int h263_long_vectors; /* use horrible h263v1 long vector mode */ + + int f_code; /* resolution */ + INT16 (*motion_val)[2]; /* used for MV prediction */ + int full_search; + int mv_dir; +#define MV_DIR_BACKWARD 1 +#define MV_DIR_FORWARD 2 + int mv_type; +#define MV_TYPE_16X16 0 /* 1 vector for the whole mb */ +#define MV_TYPE_8X8 1 /* 4 vectors (h263) */ +#define MV_TYPE_16X8 2 /* 2 vectors, one per 16x8 block */ +#define MV_TYPE_FIELD 3 /* 2 vectors, one per field */ +#define MV_TYPE_DMV 4 /* 2 vectors, special mpeg2 Dual Prime Vectors */ + /* motion vectors for a macroblock + first coordinate : 0 = forward 1 = backward + second " : depend on type + third " : 0 = x, 1 = y + */ + int mv[2][4][2]; + int field_select[2][2]; + int last_mv[2][2][2]; + + int has_b_frames; + int no_rounding; /* apply no rounding to motion estimation (MPEG4) */ + + /* macroblock layer */ + int mb_x, mb_y; + int mb_incr; + int mb_intra; + /* matrix transmitted in the bitstream */ + UINT16 intra_matrix[64]; + UINT16 chroma_intra_matrix[64]; + UINT16 non_intra_matrix[64]; + UINT16 chroma_non_intra_matrix[64]; + /* precomputed matrix (combine qscale and DCT renorm) */ + int q_intra_matrix[64]; + int q_non_intra_matrix[64]; + int block_last_index[6]; /* last non zero coefficient in block */ + + void *opaque; /* private data for the user */ + + /* bit rate control */ + int I_frame_bits; /* wanted number of bits per I frame */ + int P_frame_bits; /* same for P frame */ + long long wanted_bits; + long long total_bits; + + /* mpeg4 specific */ + int time_increment_bits; + + /* RV10 specific */ + int rv10_version; /* RV10 version: 0 or 3 */ + int rv10_first_dc_coded[3]; + + /* MJPEG specific */ + struct MJpegContext *mjpeg_ctx; + + /* MSMPEG4 specific */ + int mv_table_index; + int rl_table_index; + int rl_chroma_table_index; + int dc_table_index; + int use_skip_mb_code; + int slice_height; /* in macroblocks */ + int first_slice_line; + /* decompression specific */ + GetBitContext gb; + + /* MPEG2 specific - I wish I had not to support this mess. */ + int progressive_sequence; + int mpeg_f_code[2][2]; + int picture_structure; +/* picture type */ +#define PICT_TOP_FIELD 1 +#define PICT_BOTTOM_FIELD 2 +#define PICT_FRAME 3 + + int intra_dc_precision; + int frame_pred_frame_dct; + int top_field_first; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int repeat_first_field; + int chroma_420_type; + int progressive_frame; + int mpeg2; + int full_pel[2]; + int interlaced_dct; + int last_qscale; + int first_slice; +} MpegEncContext; + +extern const UINT8 zigzag_direct[64]; + +int MPV_common_init(MpegEncContext *s); +void MPV_common_end(MpegEncContext *s); +void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); +void MPV_frame_start(MpegEncContext *s); +void MPV_frame_end(MpegEncContext *s); + +/* motion_est.c */ + +int estimate_motion(MpegEncContext *s, + int mb_x, int mb_y, + int *mx_ptr, int *my_ptr); + +/* mpeg12.c */ +extern const UINT8 default_intra_matrix[64]; +extern const UINT8 default_non_intra_matrix[64]; + +void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number); +void mpeg1_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y); + +/* h263enc.c */ + +/* run length table */ +#define MAX_RUN 64 +#define MAX_LEVEL 64 + +typedef struct RLTable { + int n; /* number of entries of table_vlc minus 1 */ + int last; /* number of values for last = 0 */ + const UINT16 (*table_vlc)[2]; + const INT8 *table_run; + const INT8 *table_level; + UINT8 *index_run[2]; /* encoding only */ + INT8 *max_level[2]; /* encoding & decoding */ + INT8 *max_run[2]; /* encoding & decoding */ + VLC vlc; /* decoding only */ +} RLTable; + +void init_rl(RLTable *rl); +void init_vlc_rl(RLTable *rl); + +extern inline int get_rl_index(const RLTable *rl, int last, int run, int level) +{ + int index; + index = rl->index_run[last][run]; + if (index >= rl->n) + return rl->n; + if (level > rl->max_level[last][run]) + return rl->n; + return index + level - 1; +} + +void h263_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y); +void h263_encode_picture_header(MpegEncContext *s, int picture_number); +void h263_dc_scale(MpegEncContext *s); +INT16 *h263_pred_motion(MpegEncContext * s, int block, + int *px, int *py); +void mpeg4_pred_ac(MpegEncContext * s, INT16 *block, int n, + int dir); +void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number); +void h263_encode_init_vlc(MpegEncContext *s); + +void h263_decode_init_vlc(MpegEncContext *s); +int h263_decode_picture_header(MpegEncContext *s); +int mpeg4_decode_picture_header(MpegEncContext * s); +int intel_h263_decode_picture_header(MpegEncContext *s); +int h263_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]); +int h263_get_picture_format(int width, int height); +extern UINT8 ff_alternate_horizontal_scan[64]; +extern UINT8 ff_alternate_vertical_scan[64]; + +/* rv10.c */ +void rv10_encode_picture_header(MpegEncContext *s, int picture_number); +int rv_decode_dc(MpegEncContext *s, int n); + +/* msmpeg4.c */ +void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number); +void msmpeg4_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y); +void msmpeg4_dc_scale(MpegEncContext * s); +int msmpeg4_decode_picture_header(MpegEncContext * s); +int msmpeg4_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]); +int msmpeg4_decode_init_vlc(MpegEncContext *s); + +/* mjpegenc.c */ + +int mjpeg_init(MpegEncContext *s); +void mjpeg_close(MpegEncContext *s); +void mjpeg_encode_mb(MpegEncContext *s, + DCTELEM block[6][64]); +void mjpeg_picture_header(MpegEncContext *s); +void mjpeg_picture_trailer(MpegEncContext *s); diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c new file mode 100644 index 0000000000..044d964ee5 --- /dev/null +++ b/libavcodec/msmpeg4.c @@ -0,0 +1,931 @@ +/* + * MSMPEG4 backend for ffmpeg encoder and decoder + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include "common.h" +#include "dsputil.h" +#include "mpegvideo.h" + +/* + * You can also call this codec : MPEG4 with a twist ! + * + * TODO: + * - (encoding) select best mv table (two choices) + * - (encoding) select best vlc/dc table + * - (decoding) handle slice indication + */ +//#define DEBUG + +/* motion vector table */ +typedef struct MVTable { + int n; + const UINT16 *table_mv_code; + const UINT8 *table_mv_bits; + const UINT8 *table_mvx; + const UINT8 *table_mvy; + UINT16 *table_mv_index; /* encoding: convert mv to index in table_mv */ + VLC vlc; /* decoding: vlc */ +} MVTable; + +static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n); +static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded); +static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); +static int msmpeg4_decode_motion(MpegEncContext * s, + int *mx_ptr, int *my_ptr); + +#ifdef DEBUG +int intra_count = 0; +int frame_count = 0; +#endif +/* XXX: move it to mpegvideo.h */ + +static int init_done = 0; + +#include "msmpeg4data.h" + +#ifdef STATS + +const char *st_names[ST_NB] = { + "unknown", + "dc", + "intra_ac", + "inter_ac", + "intra_mb", + "inter_mb", + "mv", +}; + +int st_current_index = 0; +unsigned int st_bit_counts[ST_NB]; +unsigned int st_out_bit_counts[ST_NB]; + +#define set_stat(var) st_current_index = var; + +void print_stats(void) +{ + unsigned int total; + int i; + + printf("Input:\n"); + total = 0; + for(i=0;i<ST_NB;i++) + total += st_bit_counts[i]; + if (total == 0) + total = 1; + for(i=0;i<ST_NB;i++) { + printf("%-10s : %10.1f %5.1f%%\n", + st_names[i], + (double)st_bit_counts[i] / 8.0, + (double)st_bit_counts[i] * 100.0 / total); + } + printf("%-10s : %10.1f %5.1f%%\n", + "total", + (double)total / 8.0, + 100.0); + + printf("Output:\n"); + total = 0; + for(i=0;i<ST_NB;i++) + total += st_out_bit_counts[i]; + if (total == 0) + total = 1; + for(i=0;i<ST_NB;i++) { + printf("%-10s : %10.1f %5.1f%%\n", + st_names[i], + (double)st_out_bit_counts[i] / 8.0, + (double)st_out_bit_counts[i] * 100.0 / total); + } + printf("%-10s : %10.1f %5.1f%%\n", + "total", + (double)total / 8.0, + 100.0); +} + +#else + +#define set_stat(var) + +#endif + +/* build the table which associate a (x,y) motion vector to a vlc */ +static void init_mv_table(MVTable *tab) +{ + int i, x, y; + + tab->table_mv_index = malloc(sizeof(UINT16) * 4096); + /* mark all entries as not used */ + for(i=0;i<4096;i++) + tab->table_mv_index[i] = tab->n; + + for(i=0;i<tab->n;i++) { + x = tab->table_mvx[i]; + y = tab->table_mvy[i]; + tab->table_mv_index[(x << 6) | y] = i; + } +} + +static void code012(PutBitContext *pb, int n) +{ + if (n == 0) { + put_bits(pb, 1, 0); + } else { + put_bits(pb, 1, 1); + put_bits(pb, 1, (n >= 2)); + } +} + +/* write MSMPEG4 V3 compatible frame header */ +void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) +{ + int i; + + align_put_bits(&s->pb); + + put_bits(&s->pb, 2, s->pict_type - 1); + + put_bits(&s->pb, 5, s->qscale); + + s->rl_table_index = 2; + s->rl_chroma_table_index = 1; /* only for I frame */ + s->dc_table_index = 1; + s->mv_table_index = 1; /* only if P frame */ + s->use_skip_mb_code = 1; /* only if P frame */ + + if (s->pict_type == I_TYPE) { + put_bits(&s->pb, 5, 0x17); /* indicate only one "slice" */ + + code012(&s->pb, s->rl_chroma_table_index); + code012(&s->pb, s->rl_table_index); + + put_bits(&s->pb, 1, s->dc_table_index); + s->no_rounding = 1; + } else { + put_bits(&s->pb, 1, s->use_skip_mb_code); + + s->rl_chroma_table_index = s->rl_table_index; + code012(&s->pb, s->rl_table_index); + + put_bits(&s->pb, 1, s->dc_table_index); + + put_bits(&s->pb, 1, s->mv_table_index); + s->no_rounding ^= 1; + } + + if (!init_done) { + /* init various encoding tables */ + init_done = 1; + init_mv_table(&mv_tables[0]); + init_mv_table(&mv_tables[1]); + for(i=0;i<NB_RL_TABLES;i++) + init_rl(&rl_table[i]); + } + +#ifdef DEBUG + intra_count = 0; + printf("*****frame %d:\n", frame_count++); +#endif +} + +/* predict coded block */ +static inline int coded_block_pred(MpegEncContext * s, int n, UINT8 **coded_block_ptr) +{ + int x, y, wrap, pred, a, b, c; + + x = 2 * s->mb_x + 1 + (n & 1); + y = 2 * s->mb_y + 1 + ((n & 2) >> 1); + wrap = s->mb_width * 2 + 2; + + /* B C + * A X + */ + a = s->coded_block[(x - 1) + (y) * wrap]; + b = s->coded_block[(x - 1) + (y - 1) * wrap]; + c = s->coded_block[(x) + (y - 1) * wrap]; + + if (b == c) { + pred = a; + } else { + pred = c; + } + + /* store value */ + *coded_block_ptr = &s->coded_block[(x) + (y) * wrap]; + + return pred; +} + +static void msmpeg4_encode_motion(MpegEncContext * s, + int mx, int my) +{ + int code; + MVTable *mv; + + /* modulo encoding */ + /* WARNING : you cannot reach all the MVs even with the modulo + encoding. This is a somewhat strange compromise they took !!! */ + if (mx <= -64) + mx += 64; + else if (mx >= 64) + mx -= 64; + if (my <= -64) + my += 64; + else if (my >= 64) + my -= 64; + + mx += 32; + my += 32; +#if 0 + if ((unsigned)mx >= 64 || + (unsigned)my >= 64) + fprintf(stderr, "error mx=%d my=%d\n", mx, my); +#endif + mv = &mv_tables[s->mv_table_index]; + + code = mv->table_mv_index[(mx << 6) | my]; + set_stat(ST_MV); + put_bits(&s->pb, + mv->table_mv_bits[code], + mv->table_mv_code[code]); + if (code == mv->n) { + /* escape : code litterally */ + put_bits(&s->pb, 6, mx); + put_bits(&s->pb, 6, my); + } +} + +void msmpeg4_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + int cbp, coded_cbp, i; + int pred_x, pred_y; + UINT8 *coded_block; + + if (!s->mb_intra) { + /* compute cbp */ + set_stat(ST_INTER_MB); + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) { + /* skip macroblock */ + put_bits(&s->pb, 1, 1); + return; + } + if (s->use_skip_mb_code) + put_bits(&s->pb, 1, 0); /* mb coded */ + + put_bits(&s->pb, + table_mb_non_intra[cbp + 64][1], + table_mb_non_intra[cbp + 64][0]); + + /* motion vector */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + msmpeg4_encode_motion(s, motion_x - pred_x, + motion_y - pred_y); + } else { + /* compute cbp */ + cbp = 0; + coded_cbp = 0; + for (i = 0; i < 6; i++) { + int val, pred; + val = (s->block_last_index[i] >= 1); + cbp |= val << (5 - i); + if (i < 4) { + /* predict value for close blocks only for luma */ + pred = coded_block_pred(s, i, &coded_block); + *coded_block = val; + val = val ^ pred; + } + coded_cbp |= val << (5 - i); + } +#if 0 + if (coded_cbp) + printf("cbp=%x %x\n", cbp, coded_cbp); +#endif + + if (s->pict_type == I_TYPE) { + set_stat(ST_INTRA_MB); + put_bits(&s->pb, + table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); + } else { + if (s->use_skip_mb_code) + put_bits(&s->pb, 1, 0); /* mb coded */ + put_bits(&s->pb, + table_mb_non_intra[cbp][1], + table_mb_non_intra[cbp][0]); + } + set_stat(ST_INTRA_MB); + put_bits(&s->pb, 1, 0); /* no AC prediction yet */ + } + + for (i = 0; i < 6; i++) { + msmpeg4_encode_block(s, block[i], i); + } +} + + +/* strongly inspirated from MPEG4, but not exactly the same ! */ +void msmpeg4_dc_scale(MpegEncContext * s) +{ + int scale; + + if (s->qscale < 5) + scale = 8; + else if (s->qscale < 9) + scale = 2 * s->qscale; + else + scale = s->qscale + 8; + s->y_dc_scale = scale; + s->c_dc_scale = (s->qscale + 13) / 2; +} + +/* dir = 0: left, dir = 1: top prediction */ +static int msmpeg4_pred_dc(MpegEncContext * s, int n, + UINT16 **dc_val_ptr, int *dir_ptr) +{ + int a, b, c, x, y, wrap, pred, scale; + UINT16 *dc_val; + + /* find prediction */ + if (n < 4) { + x = 2 * s->mb_x + 1 + (n & 1); + y = 2 * s->mb_y + 1 + ((n & 2) >> 1); + wrap = s->mb_width * 2 + 2; + dc_val = s->dc_val[0]; + scale = s->y_dc_scale; + } else { + x = s->mb_x + 1; + y = s->mb_y + 1; + wrap = s->mb_width + 2; + dc_val = s->dc_val[n - 4 + 1]; + scale = s->c_dc_scale; + } + + /* B C + * A X + */ + a = dc_val[(x - 1) + (y) * wrap]; + b = dc_val[(x - 1) + (y - 1) * wrap]; + c = dc_val[(x) + (y - 1) * wrap]; + + /* XXX: the following solution consumes divisions, but it does not + necessitate to modify mpegvideo.c. The problem comes from the + fact they decided to store the quantized DC (which would lead + to problems if Q could vary !) */ + a = (a + (scale >> 1)) / scale; + b = (b + (scale >> 1)) / scale; + c = (c + (scale >> 1)) / scale; + + /* XXX: WARNING: they did not choose the same test as MPEG4. This + is very important ! */ + if (abs(a - b) <= abs(b - c)) { + pred = c; + *dir_ptr = 1; + } else { + pred = a; + *dir_ptr = 0; + } + + /* update predictor */ + *dc_val_ptr = &dc_val[(x) + (y) * wrap]; + return pred; +} + +#define DC_MAX 119 + +static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr) +{ + int sign, code; + int pred; + UINT16 *dc_val; + + pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); + + /* update predictor */ + if (n < 4) { + *dc_val = level * s->y_dc_scale; + } else { + *dc_val = level * s->c_dc_scale; + } + + /* do the prediction */ + level -= pred; + + sign = 0; + if (level < 0) { + level = -level; + sign = 1; + } + + code = level; + if (code > DC_MAX) + code = DC_MAX; + + if (s->dc_table_index == 0) { + if (n < 4) { + put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]); + } else { + put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]); + } + } else { + if (n < 4) { + put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]); + } else { + put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]); + } + } + + if (code == DC_MAX) + put_bits(&s->pb, 8, level); + + if (level != 0) { + put_bits(&s->pb, 1, sign); + } +} + +/* Encoding of a block. Very similar to MPEG4 except for a different + escape coding (same as H263) and more vlc tables. + */ +static void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) +{ + int level, run, last, i, j, last_index; + int last_non_zero, sign, slevel; + int code, run_diff, dc_pred_dir; + const RLTable *rl; + + if (s->mb_intra) { + set_stat(ST_DC); + msmpeg4_encode_dc(s, block[0], n, &dc_pred_dir); + i = 1; + if (n < 4) { + rl = &rl_table[s->rl_table_index]; + } else { + rl = &rl_table[3 + s->rl_chroma_table_index]; + } + run_diff = 0; + set_stat(ST_INTRA_AC); + } else { + i = 0; + rl = &rl_table[3 + s->rl_table_index]; + run_diff = 1; + set_stat(ST_INTER_AC); + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = zigzag_direct[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, last, run, level); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + int level1, run1; + + level1 = level - rl->max_level[last][run]; + if (level1 < 1) + goto esc2; + code = get_rl_index(rl, last, run, level1); + if (code == rl->n) { + esc2: + put_bits(&s->pb, 1, 0); + if (level > MAX_LEVEL) + goto esc3; + run1 = run - rl->max_run[last][level] - run_diff; + if (run1 < 0) + goto esc3; + code = get_rl_index(rl, last, run1, level); + if (code == rl->n) { + esc3: + /* third escape */ + put_bits(&s->pb, 1, 0); + put_bits(&s->pb, 1, last); + put_bits(&s->pb, 6, run); + put_bits(&s->pb, 8, slevel & 0xff); + } else { + /* second escape */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(&s->pb, 1, sign); + } + } else { + /* first escape */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(&s->pb, 1, sign); + } + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } +} + +/****************************************/ +/* decoding stuff */ + +static VLC mb_non_intra_vlc; +static VLC mb_intra_vlc; +static VLC dc_lum_vlc[2]; +static VLC dc_chroma_vlc[2]; + +/* init all vlc decoding tables */ +int msmpeg4_decode_init_vlc(MpegEncContext *s) +{ + int i; + MVTable *mv; + + for(i=0;i<NB_RL_TABLES;i++) { + init_rl(&rl_table[i]); + init_vlc_rl(&rl_table[i]); + } + for(i=0;i<2;i++) { + mv = &mv_tables[i]; + init_vlc(&mv->vlc, 9, mv->n + 1, + mv->table_mv_bits, 1, 1, + mv->table_mv_code, 2, 2); + } + + init_vlc(&dc_lum_vlc[0], 9, 120, + &table0_dc_lum[0][1], 8, 4, + &table0_dc_lum[0][0], 8, 4); + init_vlc(&dc_chroma_vlc[0], 9, 120, + &table0_dc_chroma[0][1], 8, 4, + &table0_dc_chroma[0][0], 8, 4); + init_vlc(&dc_lum_vlc[1], 9, 120, + &table1_dc_lum[0][1], 8, 4, + &table1_dc_lum[0][0], 8, 4); + init_vlc(&dc_chroma_vlc[1], 9, 120, + &table1_dc_chroma[0][1], 8, 4, + &table1_dc_chroma[0][0], 8, 4); + + init_vlc(&mb_non_intra_vlc, 9, 128, + &table_mb_non_intra[0][1], 8, 4, + &table_mb_non_intra[0][0], 8, 4); + init_vlc(&mb_intra_vlc, 9, 128, + &table_mb_intra[0][1], 4, 2, + &table_mb_intra[0][0], 4, 2); + return 0; +} + +static int decode012(GetBitContext *gb) +{ + int n; + n = get_bits(gb, 1); + if (n == 0) + return 0; + else + return get_bits(gb, 1) + 1; +} + +int msmpeg4_decode_picture_header(MpegEncContext * s) +{ + int code; + + s->pict_type = get_bits(&s->gb, 2) + 1; + if (s->pict_type != I_TYPE && + s->pict_type != P_TYPE) + return -1; + + s->qscale = get_bits(&s->gb, 5); + + if (s->pict_type == I_TYPE) { + code = get_bits(&s->gb, 5); + /* 0x17: one slice, 0x18: three slices */ + /* XXX: implement it */ + if (code < 0x17) + return -1; + s->slice_height = s->mb_height / (code - 0x16); + s->rl_chroma_table_index = decode012(&s->gb); + s->rl_table_index = decode012(&s->gb); + + s->dc_table_index = get_bits(&s->gb, 1); + s->no_rounding = 1; + } else { + s->use_skip_mb_code = get_bits(&s->gb, 1); + + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; + + s->dc_table_index = get_bits(&s->gb, 1); + + s->mv_table_index = get_bits(&s->gb, 1); + s->no_rounding ^= 1; + } +#ifdef DEBUG + printf("*****frame %d:\n", frame_count++); +#endif + return 0; +} + +void memsetw(short *tab, int val, int n) +{ + int i; + for(i=0;i<n;i++) + tab[i] = val; +} + +int msmpeg4_decode_mb(MpegEncContext *s, + DCTELEM block[6][64]) +{ + int cbp, code, i; + int pred, val; + UINT8 *coded_val; + + /* special slice handling */ + if (s->mb_x == 0) { + if ((s->mb_y % s->slice_height) == 0) { + int wrap; + /* reset DC pred (set previous line to 1024) */ + wrap = 2 * s->mb_width + 2; + memsetw(&s->dc_val[0][(1) + (2 * s->mb_y) * wrap], + 1024, 2 * s->mb_width); + wrap = s->mb_width + 2; + memsetw(&s->dc_val[1][(1) + (s->mb_y) * wrap], + 1024, s->mb_width); + memsetw(&s->dc_val[2][(1) + (s->mb_y) * wrap], + 1024, s->mb_width); + + s->first_slice_line = 1; + } else { + s->first_slice_line = 0; + } + } + + if (s->pict_type == P_TYPE) { + set_stat(ST_INTER_MB); + if (s->use_skip_mb_code) { + if (get_bits(&s->gb, 1)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + return 0; + } + } + + code = get_vlc(&s->gb, &mb_non_intra_vlc); + if (code < 0) + return -1; + if (code & 0x40) + s->mb_intra = 0; + else + s->mb_intra = 1; + + cbp = code & 0x3f; + } else { + set_stat(ST_INTRA_MB); + s->mb_intra = 1; + code = get_vlc(&s->gb, &mb_intra_vlc); + if (code < 0) + return -1; + /* predict coded block pattern */ + cbp = 0; + for(i=0;i<6;i++) { + val = ((code >> (5 - i)) & 1); + if (i < 4) { + pred = coded_block_pred(s, i, &coded_val); + val = val ^ pred; + *coded_val = val; + } + cbp |= val << (5 - i); + } + } + + if (!s->mb_intra) { + int mx, my; + set_stat(ST_MV); + h263_pred_motion(s, 0, &mx, &my); + if (msmpeg4_decode_motion(s, &mx, &my) < 0) + return -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + } else { + set_stat(ST_INTRA_MB); + s->ac_pred = get_bits(&s->gb, 1); + } + + for (i = 0; i < 6; i++) { + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + return -1; + } + return 0; +} + +static int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded) +{ + int code, level, i, j, last, run, run_diff; + int dc_pred_dir; + RLTable *rl; + const UINT8 *scan_table; + + if (s->mb_intra) { + /* DC coef */ + set_stat(ST_DC); + level = msmpeg4_decode_dc(s, n, &dc_pred_dir); + if (level < 0) + return -1; + block[0] = level; + if (n < 4) { + rl = &rl_table[s->rl_table_index]; + } else { + rl = &rl_table[3 + s->rl_chroma_table_index]; + } + run_diff = 0; + i = 1; + if (!coded) { + goto not_coded; + } + if (s->ac_pred) { + if (dc_pred_dir == 0) + scan_table = ff_alternate_vertical_scan; /* left */ + else + scan_table = ff_alternate_horizontal_scan; /* top */ + } else { + scan_table = zigzag_direct; + } + set_stat(ST_INTRA_AC); + } else { + i = 0; + rl = &rl_table[3 + s->rl_table_index]; + run_diff = 1; + if (!coded) { + s->block_last_index[n] = i - 1; + return 0; + } + scan_table = zigzag_direct; + set_stat(ST_INTER_AC); + } + + for(;;) { + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0) + return -1; + if (code == rl->n) { + /* escape */ + if (get_bits(&s->gb, 1) == 0) { + if (get_bits(&s->gb, 1) == 0) { + /* third escape */ + last = get_bits(&s->gb, 1); + run = get_bits(&s->gb, 6); + level = get_bits(&s->gb, 8); + level = (level << 24) >> 24; /* sign extend */ + } else { + /* second escape */ + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0 || code >= rl->n) + return -1; + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + run += rl->max_run[last][level] + run_diff; + if (get_bits(&s->gb, 1)) + level = -level; + } + } else { + /* first escape */ + code = get_vlc(&s->gb, &rl->vlc); + if (code < 0 || code >= rl->n) + return -1; + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + level += rl->max_level[last][run]; + if (get_bits(&s->gb, 1)) + level = -level; + } + } else { + run = rl->table_run[code]; + level = rl->table_level[code]; + last = code >= rl->last; + if (get_bits(&s->gb, 1)) + level = -level; + } + i += run; + if (i >= 64) + return -1; + j = scan_table[i]; + block[j] = level; + i++; + if (last) + break; + } + not_coded: + if (s->mb_intra) { + mpeg4_pred_ac(s, block, n, dc_pred_dir); + if (s->ac_pred) { + i = 64; /* XXX: not optimal */ + } + } + s->block_last_index[n] = i - 1; + + return 0; +} + +static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) +{ + int level, pred; + UINT16 *dc_val; + + if (n < 4) { + level = get_vlc(&s->gb, &dc_lum_vlc[s->dc_table_index]); + } else { + level = get_vlc(&s->gb, &dc_chroma_vlc[s->dc_table_index]); + } + if (level < 0) + return -1; + + if (level == DC_MAX) { + level = get_bits(&s->gb, 8); + if (get_bits(&s->gb, 1)) + level = -level; + } else if (level != 0) { + if (get_bits(&s->gb, 1)) + level = -level; + } + + pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr); + level += pred; + + /* update predictor */ + if (n < 4) { + *dc_val = level * s->y_dc_scale; + } else { + *dc_val = level * s->c_dc_scale; + } + + return level; +} + +static int msmpeg4_decode_motion(MpegEncContext * s, + int *mx_ptr, int *my_ptr) +{ + MVTable *mv; + int code, mx, my; + + mv = &mv_tables[s->mv_table_index]; + + code = get_vlc(&s->gb, &mv->vlc); + if (code < 0) + return -1; + if (code == mv->n) { + mx = get_bits(&s->gb, 6); + my = get_bits(&s->gb, 6); + } else { + mx = mv->table_mvx[code]; + my = mv->table_mvy[code]; + } + + mx += *mx_ptr - 32; + my += *my_ptr - 32; + /* WARNING : they do not do exactly modulo encoding */ + if (mx <= -64) + mx += 64; + else if (mx >= 64) + mx -= 64; + + if (my <= -64) + my += 64; + else if (my >= 64) + my -= 64; + *mx_ptr = mx; + *my_ptr = my; + return 0; +} diff --git a/libavcodec/msmpeg4data.h b/libavcodec/msmpeg4data.h new file mode 100644 index 0000000000..03a2612114 --- /dev/null +++ b/libavcodec/msmpeg4data.h @@ -0,0 +1,1767 @@ +/* + * MSMPEG4 data tables. + */ + +/* intra picture macro block coded block pattern */ +const UINT16 table_mb_intra[64][2] = { +{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 }, +{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 }, +{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 }, +{ 0x2, 6 },{ 0xec, 9 },{ 0x77, 8 },{ 0x0, 8 }, +{ 0x3, 5 },{ 0xb7, 9 },{ 0x2c, 7 },{ 0x13, 7 }, +{ 0x1, 6 },{ 0x168, 10 },{ 0x46, 8 },{ 0x3f, 8 }, +{ 0x1e, 6 },{ 0x712, 13 },{ 0xb5, 9 },{ 0x42, 8 }, +{ 0x22, 7 },{ 0x1c5, 11 },{ 0x11e, 10 },{ 0x87, 9 }, +{ 0x6, 4 },{ 0x3, 9 },{ 0x1e, 7 },{ 0x1c, 6 }, +{ 0x12, 7 },{ 0x388, 12 },{ 0x44, 9 },{ 0x70, 9 }, +{ 0x1f, 6 },{ 0x23e, 11 },{ 0x39, 8 },{ 0x8e, 9 }, +{ 0x1, 7 },{ 0x1c6, 11 },{ 0xb6, 9 },{ 0x45, 9 }, +{ 0x14, 6 },{ 0x23f, 11 },{ 0x7d, 9 },{ 0x18, 9 }, +{ 0x7, 7 },{ 0x1c7, 11 },{ 0x86, 9 },{ 0x19, 9 }, +{ 0x15, 6 },{ 0x1db, 10 },{ 0x2, 9 },{ 0x46, 9 }, +{ 0xd, 8 },{ 0x713, 13 },{ 0x1da, 10 },{ 0x169, 10 }, +}; + +/* non intra picture macro block coded block pattern + mb type */ +const UINT32 table_mb_non_intra[128][2] = { +{ 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 }, +{ 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 }, +{ 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 }, +{ 0x79, 13 },{ 0xa82, 18 },{ 0x969d, 16 },{ 0x2a4, 16 }, +{ 0x978, 12 },{ 0x543, 17 },{ 0x41df, 15 },{ 0x7f9, 17 }, +{ 0x12f3, 13 },{ 0x25a6b, 18 },{ 0x25ef9, 18 },{ 0x3fa, 16 }, +{ 0x20ee, 14 },{ 0x969ab, 20 },{ 0x969c, 16 },{ 0x25ef8, 18 }, +{ 0x12d2, 13 },{ 0xa85, 18 },{ 0x969e, 16 },{ 0x4bc8, 15 }, +{ 0x3d, 12 },{ 0x12f7f, 17 },{ 0x2a2, 16 },{ 0x969f, 16 }, +{ 0x25ee, 14 },{ 0x12d355, 21 },{ 0x12f7d, 17 },{ 0x12f7e, 17 }, +{ 0x9e5, 12 },{ 0xa81, 18 },{ 0x4b4d4, 19 },{ 0x83bd, 16 }, +{ 0x78, 13 },{ 0x969b, 16 },{ 0x3fe, 16 },{ 0x2a5, 16 }, +{ 0x7e, 13 },{ 0xa80, 18 },{ 0x2a3, 16 },{ 0x3fb, 16 }, +{ 0x1076, 13 },{ 0xa84, 18 },{ 0x153, 15 },{ 0x4bc9, 15 }, +{ 0x55, 13 },{ 0x12d354, 21 },{ 0x4bde, 15 },{ 0x25e5, 14 }, +{ 0x25b, 10 },{ 0x4b4c, 15 },{ 0x96b, 12 },{ 0x96a, 12 }, +{ 0x1, 2 },{ 0x0, 7 },{ 0x26, 6 },{ 0x12b, 9 }, +{ 0x7, 3 },{ 0x20f, 10 },{ 0x4, 9 },{ 0x28, 12 }, +{ 0x6, 3 },{ 0x20a, 10 },{ 0x128, 9 },{ 0x2b, 12 }, +{ 0x11, 5 },{ 0x1b, 11 },{ 0x13a, 9 },{ 0x4ff, 11 }, +{ 0x3, 4 },{ 0x277, 10 },{ 0x106, 9 },{ 0x839, 12 }, +{ 0xb, 4 },{ 0x27b, 10 },{ 0x12c, 9 },{ 0x4bf, 11 }, +{ 0x9, 6 },{ 0x35, 12 },{ 0x27e, 10 },{ 0x13c8, 13 }, +{ 0x1, 6 },{ 0x4aa, 11 },{ 0x208, 10 },{ 0x29, 12 }, +{ 0x1, 4 },{ 0x254, 10 },{ 0x12e, 9 },{ 0x838, 12 }, +{ 0x24, 6 },{ 0x4f3, 11 },{ 0x276, 10 },{ 0x12f6, 13 }, +{ 0x1, 5 },{ 0x27a, 10 },{ 0x13e, 9 },{ 0x3e, 12 }, +{ 0x8, 6 },{ 0x413, 11 },{ 0xc, 10 },{ 0x4be, 11 }, +{ 0x14, 5 },{ 0x412, 11 },{ 0x253, 10 },{ 0x97a, 12 }, +{ 0x21, 6 },{ 0x4ab, 11 },{ 0x20b, 10 },{ 0x34, 12 }, +{ 0x15, 5 },{ 0x278, 10 },{ 0x252, 10 },{ 0x968, 12 }, +{ 0x5, 5 },{ 0xb, 10 },{ 0x9c, 8 },{ 0xe, 10 }, +}; + +/* dc table 0 */ + +static const UINT32 table0_dc_lum[120][2] = { +{ 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 }, +{ 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 }, +{ 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 }, +{ 0x3, 8 },{ 0x7, 8 },{ 0x27, 8 },{ 0x37, 8 }, +{ 0x5, 9 },{ 0x4c, 9 },{ 0x6c, 9 },{ 0x6d, 9 }, +{ 0x8, 10 },{ 0x19, 10 },{ 0x9b, 10 },{ 0x1b, 10 }, +{ 0x9a, 10 },{ 0x13, 11 },{ 0x34, 11 },{ 0x35, 11 }, +{ 0x61, 12 },{ 0x48, 13 },{ 0xc4, 13 },{ 0x4a, 13 }, +{ 0xc6, 13 },{ 0xc7, 13 },{ 0x92, 14 },{ 0x18b, 14 }, +{ 0x93, 14 },{ 0x183, 14 },{ 0x182, 14 },{ 0x96, 14 }, +{ 0x97, 14 },{ 0x180, 14 },{ 0x314, 15 },{ 0x315, 15 }, +{ 0x605, 16 },{ 0x604, 16 },{ 0x606, 16 },{ 0xc0e, 17 }, +{ 0x303cd, 23 },{ 0x303c9, 23 },{ 0x303c8, 23 },{ 0x303ca, 23 }, +{ 0x303cb, 23 },{ 0x303cc, 23 },{ 0x303ce, 23 },{ 0x303cf, 23 }, +{ 0x303d0, 23 },{ 0x303d1, 23 },{ 0x303d2, 23 },{ 0x303d3, 23 }, +{ 0x303d4, 23 },{ 0x303d5, 23 },{ 0x303d6, 23 },{ 0x303d7, 23 }, +{ 0x303d8, 23 },{ 0x303d9, 23 },{ 0x303da, 23 },{ 0x303db, 23 }, +{ 0x303dc, 23 },{ 0x303dd, 23 },{ 0x303de, 23 },{ 0x303df, 23 }, +{ 0x303e0, 23 },{ 0x303e1, 23 },{ 0x303e2, 23 },{ 0x303e3, 23 }, +{ 0x303e4, 23 },{ 0x303e5, 23 },{ 0x303e6, 23 },{ 0x303e7, 23 }, +{ 0x303e8, 23 },{ 0x303e9, 23 },{ 0x303ea, 23 },{ 0x303eb, 23 }, +{ 0x303ec, 23 },{ 0x303ed, 23 },{ 0x303ee, 23 },{ 0x303ef, 23 }, +{ 0x303f0, 23 },{ 0x303f1, 23 },{ 0x303f2, 23 },{ 0x303f3, 23 }, +{ 0x303f4, 23 },{ 0x303f5, 23 },{ 0x303f6, 23 },{ 0x303f7, 23 }, +{ 0x303f8, 23 },{ 0x303f9, 23 },{ 0x303fa, 23 },{ 0x303fb, 23 }, +{ 0x303fc, 23 },{ 0x303fd, 23 },{ 0x303fe, 23 },{ 0x303ff, 23 }, +{ 0x60780, 24 },{ 0x60781, 24 },{ 0x60782, 24 },{ 0x60783, 24 }, +{ 0x60784, 24 },{ 0x60785, 24 },{ 0x60786, 24 },{ 0x60787, 24 }, +{ 0x60788, 24 },{ 0x60789, 24 },{ 0x6078a, 24 },{ 0x6078b, 24 }, +{ 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 }, +}; + +static const UINT32 table0_dc_chroma[120][2] = { +{ 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 }, +{ 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 }, +{ 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 }, +{ 0x39, 6 },{ 0x3d, 6 },{ 0x61, 7 },{ 0x79, 7 }, +{ 0x80, 8 },{ 0xc8, 8 },{ 0xca, 8 },{ 0xf0, 8 }, +{ 0x81, 8 },{ 0xc0, 8 },{ 0xc9, 8 },{ 0x107, 9 }, +{ 0x106, 9 },{ 0x196, 9 },{ 0x183, 9 },{ 0x1e3, 9 }, +{ 0x1e2, 9 },{ 0x20a, 10 },{ 0x20b, 10 },{ 0x609, 11 }, +{ 0x412, 11 },{ 0x413, 11 },{ 0x60b, 11 },{ 0x411, 11 }, +{ 0x60a, 11 },{ 0x65f, 11 },{ 0x410, 11 },{ 0x65d, 11 }, +{ 0x65e, 11 },{ 0xcb8, 12 },{ 0xc10, 12 },{ 0xcb9, 12 }, +{ 0x1823, 13 },{ 0x3045, 14 },{ 0x6089, 15 },{ 0xc110, 16 }, +{ 0x304448, 22 },{ 0x304449, 22 },{ 0x30444a, 22 },{ 0x30444b, 22 }, +{ 0x30444c, 22 },{ 0x30444d, 22 },{ 0x30444e, 22 },{ 0x30444f, 22 }, +{ 0x304450, 22 },{ 0x304451, 22 },{ 0x304452, 22 },{ 0x304453, 22 }, +{ 0x304454, 22 },{ 0x304455, 22 },{ 0x304456, 22 },{ 0x304457, 22 }, +{ 0x304458, 22 },{ 0x304459, 22 },{ 0x30445a, 22 },{ 0x30445b, 22 }, +{ 0x30445c, 22 },{ 0x30445d, 22 },{ 0x30445e, 22 },{ 0x30445f, 22 }, +{ 0x304460, 22 },{ 0x304461, 22 },{ 0x304462, 22 },{ 0x304463, 22 }, +{ 0x304464, 22 },{ 0x304465, 22 },{ 0x304466, 22 },{ 0x304467, 22 }, +{ 0x304468, 22 },{ 0x304469, 22 },{ 0x30446a, 22 },{ 0x30446b, 22 }, +{ 0x30446c, 22 },{ 0x30446d, 22 },{ 0x30446e, 22 },{ 0x30446f, 22 }, +{ 0x304470, 22 },{ 0x304471, 22 },{ 0x304472, 22 },{ 0x304473, 22 }, +{ 0x304474, 22 },{ 0x304475, 22 },{ 0x304476, 22 },{ 0x304477, 22 }, +{ 0x304478, 22 },{ 0x304479, 22 },{ 0x30447a, 22 },{ 0x30447b, 22 }, +{ 0x30447c, 22 },{ 0x30447d, 22 },{ 0x30447e, 22 },{ 0x30447f, 22 }, +{ 0x608880, 23 },{ 0x608881, 23 },{ 0x608882, 23 },{ 0x608883, 23 }, +{ 0x608884, 23 },{ 0x608885, 23 },{ 0x608886, 23 },{ 0x608887, 23 }, +{ 0x608888, 23 },{ 0x608889, 23 },{ 0x60888a, 23 },{ 0x60888b, 23 }, +{ 0x60888c, 23 },{ 0x60888d, 23 },{ 0x60888e, 23 },{ 0x60888f, 23 }, +}; + +/* dc table 1 */ + +const UINT32 table1_dc_lum[120][2] = { +{ 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 }, +{ 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 }, +{ 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 }, +{ 0x13, 6 },{ 0x8, 7 },{ 0x18, 7 },{ 0x1c, 7 }, +{ 0x24, 7 },{ 0x4, 8 },{ 0x6, 8 },{ 0x12, 8 }, +{ 0x32, 8 },{ 0x3b, 8 },{ 0x4a, 8 },{ 0x4b, 8 }, +{ 0xb, 9 },{ 0x26, 9 },{ 0x27, 9 },{ 0x66, 9 }, +{ 0x74, 9 },{ 0x75, 9 },{ 0x14, 10 },{ 0x1c, 10 }, +{ 0x1f, 10 },{ 0x1d, 10 },{ 0x2b, 11 },{ 0x3d, 11 }, +{ 0x19d, 11 },{ 0x19f, 11 },{ 0x54, 12 },{ 0x339, 12 }, +{ 0x338, 12 },{ 0x33d, 12 },{ 0xab, 13 },{ 0xf1, 13 }, +{ 0x678, 13 },{ 0xf2, 13 },{ 0x1e0, 14 },{ 0x1e1, 14 }, +{ 0x154, 14 },{ 0xcf2, 14 },{ 0x3cc, 15 },{ 0x2ab, 15 }, +{ 0x19e7, 15 },{ 0x3ce, 15 },{ 0x19e6, 15 },{ 0x554, 16 }, +{ 0x79f, 16 },{ 0x555, 16 },{ 0xf3d, 17 },{ 0xf37, 17 }, +{ 0xf3c, 17 },{ 0xf35, 17 },{ 0x1e6d, 18 },{ 0x1e68, 18 }, +{ 0x3cd8, 19 },{ 0x3cd3, 19 },{ 0x3cd9, 19 },{ 0x79a4, 20 }, +{ 0xf34ba, 25 },{ 0xf34b4, 25 },{ 0xf34b5, 25 },{ 0xf34b6, 25 }, +{ 0xf34b7, 25 },{ 0xf34b8, 25 },{ 0xf34b9, 25 },{ 0xf34bb, 25 }, +{ 0xf34bc, 25 },{ 0xf34bd, 25 },{ 0xf34be, 25 },{ 0xf34bf, 25 }, +{ 0x1e6940, 26 },{ 0x1e6941, 26 },{ 0x1e6942, 26 },{ 0x1e6943, 26 }, +{ 0x1e6944, 26 },{ 0x1e6945, 26 },{ 0x1e6946, 26 },{ 0x1e6947, 26 }, +{ 0x1e6948, 26 },{ 0x1e6949, 26 },{ 0x1e694a, 26 },{ 0x1e694b, 26 }, +{ 0x1e694c, 26 },{ 0x1e694d, 26 },{ 0x1e694e, 26 },{ 0x1e694f, 26 }, +{ 0x1e6950, 26 },{ 0x1e6951, 26 },{ 0x1e6952, 26 },{ 0x1e6953, 26 }, +{ 0x1e6954, 26 },{ 0x1e6955, 26 },{ 0x1e6956, 26 },{ 0x1e6957, 26 }, +{ 0x1e6958, 26 },{ 0x1e6959, 26 },{ 0x1e695a, 26 },{ 0x1e695b, 26 }, +{ 0x1e695c, 26 },{ 0x1e695d, 26 },{ 0x1e695e, 26 },{ 0x1e695f, 26 }, +{ 0x1e6960, 26 },{ 0x1e6961, 26 },{ 0x1e6962, 26 },{ 0x1e6963, 26 }, +{ 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 }, +}; + +const UINT32 table1_dc_chroma[120][2] = { +{ 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 }, +{ 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 }, +{ 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 }, +{ 0x66, 7 },{ 0xa6, 8 },{ 0xc6, 8 },{ 0xcf, 8 }, +{ 0x14f, 9 },{ 0x18e, 9 },{ 0x19c, 9 },{ 0x29d, 10 }, +{ 0x33a, 10 },{ 0x538, 11 },{ 0x63c, 11 },{ 0x63e, 11 }, +{ 0x63f, 11 },{ 0x676, 11 },{ 0xa73, 12 },{ 0xc7a, 12 }, +{ 0xcef, 12 },{ 0x14e5, 13 },{ 0x19dd, 13 },{ 0x29c8, 14 }, +{ 0x29c9, 14 },{ 0x63dd, 15 },{ 0x33b8, 14 },{ 0x33b9, 14 }, +{ 0xc7b6, 16 },{ 0x63d8, 15 },{ 0x63df, 15 },{ 0xc7b3, 16 }, +{ 0xc7b4, 16 },{ 0xc7b5, 16 },{ 0x63de, 15 },{ 0xc7b7, 16 }, +{ 0xc7b8, 16 },{ 0xc7b9, 16 },{ 0x18f65, 17 },{ 0x31ec8, 18 }, +{ 0xc7b248, 24 },{ 0xc7b249, 24 },{ 0xc7b24a, 24 },{ 0xc7b24b, 24 }, +{ 0xc7b24c, 24 },{ 0xc7b24d, 24 },{ 0xc7b24e, 24 },{ 0xc7b24f, 24 }, +{ 0xc7b250, 24 },{ 0xc7b251, 24 },{ 0xc7b252, 24 },{ 0xc7b253, 24 }, +{ 0xc7b254, 24 },{ 0xc7b255, 24 },{ 0xc7b256, 24 },{ 0xc7b257, 24 }, +{ 0xc7b258, 24 },{ 0xc7b259, 24 },{ 0xc7b25a, 24 },{ 0xc7b25b, 24 }, +{ 0xc7b25c, 24 },{ 0xc7b25d, 24 },{ 0xc7b25e, 24 },{ 0xc7b25f, 24 }, +{ 0xc7b260, 24 },{ 0xc7b261, 24 },{ 0xc7b262, 24 },{ 0xc7b263, 24 }, +{ 0xc7b264, 24 },{ 0xc7b265, 24 },{ 0xc7b266, 24 },{ 0xc7b267, 24 }, +{ 0xc7b268, 24 },{ 0xc7b269, 24 },{ 0xc7b26a, 24 },{ 0xc7b26b, 24 }, +{ 0xc7b26c, 24 },{ 0xc7b26d, 24 },{ 0xc7b26e, 24 },{ 0xc7b26f, 24 }, +{ 0xc7b270, 24 },{ 0xc7b271, 24 },{ 0xc7b272, 24 },{ 0xc7b273, 24 }, +{ 0xc7b274, 24 },{ 0xc7b275, 24 },{ 0xc7b276, 24 },{ 0xc7b277, 24 }, +{ 0xc7b278, 24 },{ 0xc7b279, 24 },{ 0xc7b27a, 24 },{ 0xc7b27b, 24 }, +{ 0xc7b27c, 24 },{ 0xc7b27d, 24 },{ 0xc7b27e, 24 },{ 0xc7b27f, 24 }, +{ 0x18f6480, 25 },{ 0x18f6481, 25 },{ 0x18f6482, 25 },{ 0x18f6483, 25 }, +{ 0x18f6484, 25 },{ 0x18f6485, 25 },{ 0x18f6486, 25 },{ 0x18f6487, 25 }, +{ 0x18f6488, 25 },{ 0x18f6489, 25 },{ 0x18f648a, 25 },{ 0x18f648b, 25 }, +{ 0x18f648c, 25 },{ 0x18f648d, 25 },{ 0x18f648e, 25 },{ 0x18f648f, 25 }, +}; + +/* vlc table 0, for intra luma */ + +static const UINT16 table0_vlc[133][2] = { +{ 0x1, 2 },{ 0x6, 3 },{ 0xf, 4 },{ 0x16, 5 }, +{ 0x20, 6 },{ 0x18, 7 },{ 0x8, 8 },{ 0x9a, 8 }, +{ 0x56, 9 },{ 0x13e, 9 },{ 0xf0, 10 },{ 0x3a5, 10 }, +{ 0x77, 11 },{ 0x1ef, 11 },{ 0x9a, 12 },{ 0x5d, 13 }, +{ 0x1, 4 },{ 0x11, 5 },{ 0x2, 7 },{ 0xb, 8 }, +{ 0x12, 9 },{ 0x1d6, 9 },{ 0x27e, 10 },{ 0x191, 11 }, +{ 0xea, 12 },{ 0x3dc, 12 },{ 0x13b, 13 },{ 0x4, 5 }, +{ 0x14, 7 },{ 0x9e, 8 },{ 0x9, 10 },{ 0x1ac, 11 }, +{ 0x1e2, 11 },{ 0x3ca, 12 },{ 0x5f, 13 },{ 0x17, 5 }, +{ 0x4e, 7 },{ 0x5e, 9 },{ 0xf3, 10 },{ 0x1ad, 11 }, +{ 0xec, 12 },{ 0x5f0, 13 },{ 0xe, 6 },{ 0xe1, 8 }, +{ 0x3a4, 10 },{ 0x9c, 12 },{ 0x13d, 13 },{ 0x3b, 6 }, +{ 0x1c, 9 },{ 0x14, 11 },{ 0x9be, 12 },{ 0x6, 7 }, +{ 0x7a, 9 },{ 0x190, 11 },{ 0x137, 13 },{ 0x1b, 7 }, +{ 0x8, 10 },{ 0x75c, 11 },{ 0x71, 7 },{ 0xd7, 10 }, +{ 0x9bf, 12 },{ 0x7, 8 },{ 0xaf, 10 },{ 0x4cc, 11 }, +{ 0x34, 8 },{ 0x265, 10 },{ 0x9f, 12 },{ 0xe0, 8 }, +{ 0x16, 11 },{ 0x327, 12 },{ 0x15, 9 },{ 0x17d, 11 }, +{ 0xebb, 12 },{ 0x14, 9 },{ 0xf6, 10 },{ 0x1e4, 11 }, +{ 0xcb, 10 },{ 0x99d, 12 },{ 0xca, 10 },{ 0x2fc, 12 }, +{ 0x17f, 11 },{ 0x4cd, 11 },{ 0x2fd, 12 },{ 0x4fe, 11 }, +{ 0x13a, 13 },{ 0xa, 4 },{ 0x42, 7 },{ 0x1d3, 9 }, +{ 0x4dd, 11 },{ 0x12, 5 },{ 0xe8, 8 },{ 0x4c, 11 }, +{ 0x136, 13 },{ 0x39, 6 },{ 0x264, 10 },{ 0xeba, 12 }, +{ 0x0, 7 },{ 0xae, 10 },{ 0x99c, 12 },{ 0x1f, 7 }, +{ 0x4de, 11 },{ 0x43, 7 },{ 0x4dc, 11 },{ 0x3, 8 }, +{ 0x3cb, 12 },{ 0x6, 8 },{ 0x99e, 12 },{ 0x2a, 8 }, +{ 0x5f1, 13 },{ 0xf, 8 },{ 0x9fe, 12 },{ 0x33, 8 }, +{ 0x9ff, 12 },{ 0x98, 8 },{ 0x99f, 12 },{ 0xea, 8 }, +{ 0x13c, 13 },{ 0x2e, 8 },{ 0x192, 11 },{ 0x136, 9 }, +{ 0x6a, 9 },{ 0x15, 11 },{ 0x3af, 10 },{ 0x1e3, 11 }, +{ 0x74, 11 },{ 0xeb, 12 },{ 0x2f9, 12 },{ 0x5c, 13 }, +{ 0xed, 12 },{ 0x3dd, 12 },{ 0x326, 12 },{ 0x5e, 13 }, +{ 0x16, 7 }, +}; + +const INT8 table0_level[132] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 1, 2, 3, 4, 5, + 6, 7, 8, 1, 2, 3, 4, 5, + 6, 7, 1, 2, 3, 4, 5, 1, + 2, 3, 4, 1, 2, 3, 4, 1, + 2, 3, 1, 2, 3, 1, 2, 3, + 1, 2, 3, 1, 2, 3, 1, 2, + 3, 1, 2, 3, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 2, 3, + 4, 1, 2, 3, 4, 1, 2, 3, + 1, 2, 3, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +const INT8 table0_run[132] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, 3, 3, 3, 3, 3, + 3, 3, 4, 4, 4, 4, 4, 5, + 5, 5, 5, 6, 6, 6, 6, 7, + 7, 7, 8, 8, 8, 9, 9, 9, + 10, 10, 10, 11, 11, 11, 12, 12, + 12, 13, 13, 13, 14, 14, 15, 15, + 16, 17, 18, 19, 20, 0, 0, 0, + 0, 1, 1, 1, 1, 2, 2, 2, + 3, 3, 3, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, +}; + +/* vlc table 1, for intra chroma and P macro blocks */ + +const UINT16 table1_vlc[149][2] = { +{ 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 }, +{ 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 }, +{ 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 }, +{ 0x2b22, 14 },{ 0xbe6, 15 },{ 0xb, 4 },{ 0x37, 7 }, +{ 0x62, 9 },{ 0x7, 11 },{ 0x166, 12 },{ 0xce, 13 }, +{ 0x1590, 13 },{ 0x5f6, 14 },{ 0xbe7, 15 },{ 0x7, 5 }, +{ 0x6d, 8 },{ 0x3, 11 },{ 0x31f, 12 },{ 0x5f2, 14 }, +{ 0x2, 6 },{ 0x61, 9 },{ 0x55, 12 },{ 0x1df, 14 }, +{ 0x1a, 6 },{ 0x1e, 10 },{ 0xac9, 12 },{ 0x2b23, 14 }, +{ 0x1e, 6 },{ 0x1f, 10 },{ 0xac3, 12 },{ 0x2b2b, 14 }, +{ 0x6, 7 },{ 0x4, 11 },{ 0x2f8, 13 },{ 0x19, 7 }, +{ 0x6, 11 },{ 0x63d, 13 },{ 0x57, 7 },{ 0x182, 11 }, +{ 0x2aa2, 14 },{ 0x4, 8 },{ 0x180, 11 },{ 0x59c, 14 }, +{ 0x7d, 8 },{ 0x164, 12 },{ 0x76d, 15 },{ 0x2, 9 }, +{ 0x18d, 11 },{ 0x1581, 13 },{ 0xad, 8 },{ 0x60, 12 }, +{ 0xc67, 14 },{ 0x1c, 9 },{ 0xee, 13 },{ 0x3, 9 }, +{ 0x2cf, 13 },{ 0xd9, 9 },{ 0x1580, 13 },{ 0x2, 11 }, +{ 0x183, 11 },{ 0x57, 12 },{ 0x61, 12 },{ 0x31, 11 }, +{ 0x66, 12 },{ 0x631, 13 },{ 0x632, 13 },{ 0xac, 13 }, +{ 0x31d, 12 },{ 0x76, 12 },{ 0x3a, 11 },{ 0x165, 12 }, +{ 0xc66, 14 },{ 0x3, 2 },{ 0x54, 7 },{ 0x2ab, 10 }, +{ 0x16, 13 },{ 0x5f7, 14 },{ 0x5, 4 },{ 0xf8, 9 }, +{ 0xaa9, 12 },{ 0x5f, 15 },{ 0x4, 4 },{ 0x1c, 10 }, +{ 0x1550, 13 },{ 0x4, 5 },{ 0x77, 11 },{ 0x76c, 15 }, +{ 0xe, 5 },{ 0xa, 12 },{ 0xc, 5 },{ 0x562, 11 }, +{ 0x4, 6 },{ 0x31c, 12 },{ 0x6, 6 },{ 0xc8, 13 }, +{ 0xd, 6 },{ 0x1da, 13 },{ 0x7, 6 },{ 0xc9, 13 }, +{ 0x1, 7 },{ 0x2e, 14 },{ 0x14, 7 },{ 0x1596, 13 }, +{ 0xa, 7 },{ 0xac2, 12 },{ 0x16, 7 },{ 0x15b, 14 }, +{ 0x15, 7 },{ 0x15a, 14 },{ 0xf, 8 },{ 0x5e, 15 }, +{ 0x7e, 8 },{ 0xab, 8 },{ 0x2d, 9 },{ 0xd8, 9 }, +{ 0xb, 9 },{ 0x14, 10 },{ 0x2b3, 10 },{ 0x1f3, 10 }, +{ 0x3a, 10 },{ 0x0, 10 },{ 0x58, 10 },{ 0x2e, 9 }, +{ 0x5e, 10 },{ 0x563, 11 },{ 0xec, 12 },{ 0x54, 12 }, +{ 0xac1, 12 },{ 0x1556, 13 },{ 0x2fa, 13 },{ 0x181, 11 }, +{ 0x1557, 13 },{ 0x59d, 14 },{ 0x2aa3, 14 },{ 0x2b2a, 14 }, +{ 0x1de, 14 },{ 0x63c, 13 },{ 0xcf, 13 },{ 0x1594, 13 }, +{ 0xd, 9 }, +}; + +const INT8 table1_level[148] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 1, + 2, 3, 4, 5, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 1, 2, 3, 1, 2, + 3, 1, 2, 3, 1, 2, 3, 1, + 2, 3, 1, 2, 3, 1, 2, 1, + 2, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 3, 4, 5, 1, 2, + 3, 4, 1, 2, 3, 1, 2, 3, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, +}; + +const INT8 table1_run[148] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 2, + 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 7, 7, 7, 8, 8, + 8, 9, 9, 9, 10, 10, 10, 11, + 11, 11, 12, 12, 12, 13, 13, 14, + 14, 15, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, + 29, 0, 0, 0, 0, 0, 1, 1, + 1, 1, 2, 2, 2, 3, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, + 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13, 14, 14, 15, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, +}; + +/* third vlc table */ + +const UINT16 table2_vlc[186][2] = { +{ 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 }, +{ 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 }, +{ 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 }, +{ 0x172, 11 },{ 0x24d, 12 },{ 0x3da, 12 },{ 0x2dd, 13 }, +{ 0x1f55, 13 },{ 0x5b9, 14 },{ 0x3eae, 14 },{ 0x0, 4 }, +{ 0x10, 5 },{ 0x8, 7 },{ 0x20, 8 },{ 0x29, 9 }, +{ 0x1f4, 9 },{ 0x233, 10 },{ 0x1e0, 11 },{ 0x12a, 12 }, +{ 0x3dd, 12 },{ 0x50a, 13 },{ 0x1f29, 13 },{ 0xa42, 14 }, +{ 0x1272, 15 },{ 0x1737, 15 },{ 0x3, 5 },{ 0x11, 7 }, +{ 0xc4, 8 },{ 0x4b, 10 },{ 0xb4, 11 },{ 0x7d4, 11 }, +{ 0x345, 12 },{ 0x2d7, 13 },{ 0x7bf, 13 },{ 0x938, 14 }, +{ 0xbbb, 14 },{ 0x95e, 15 },{ 0x13, 5 },{ 0x78, 7 }, +{ 0x69, 9 },{ 0x232, 10 },{ 0x461, 11 },{ 0x3ec, 12 }, +{ 0x520, 13 },{ 0x1f2a, 13 },{ 0x3e50, 14 },{ 0x3e51, 14 }, +{ 0x1486, 15 },{ 0xc, 6 },{ 0x24, 9 },{ 0x94, 11 }, +{ 0x8c0, 12 },{ 0xf09, 14 },{ 0x1ef0, 15 },{ 0x3d, 6 }, +{ 0x53, 9 },{ 0x1a0, 11 },{ 0x2d6, 13 },{ 0xf08, 14 }, +{ 0x13, 7 },{ 0x7c, 9 },{ 0x7c1, 11 },{ 0x4ac, 14 }, +{ 0x1b, 7 },{ 0xa0, 10 },{ 0x344, 12 },{ 0xf79, 14 }, +{ 0x79, 7 },{ 0x3e1, 10 },{ 0x2d4, 13 },{ 0x2306, 14 }, +{ 0x21, 8 },{ 0x23c, 10 },{ 0xfae, 12 },{ 0x23de, 14 }, +{ 0x35, 8 },{ 0x175, 11 },{ 0x7b3, 13 },{ 0xc5, 8 }, +{ 0x174, 11 },{ 0x785, 13 },{ 0x48, 9 },{ 0x1a3, 11 }, +{ 0x49e, 13 },{ 0x2c, 9 },{ 0xfa, 10 },{ 0x7d6, 11 }, +{ 0x92, 10 },{ 0x5cc, 13 },{ 0x1ef1, 15 },{ 0xa3, 10 }, +{ 0x3ed, 12 },{ 0x93e, 14 },{ 0x1e2, 11 },{ 0x1273, 15 }, +{ 0x7c4, 11 },{ 0x1487, 15 },{ 0x291, 12 },{ 0x293, 12 }, +{ 0xf8a, 12 },{ 0x509, 13 },{ 0x508, 13 },{ 0x78d, 13 }, +{ 0x7be, 13 },{ 0x78c, 13 },{ 0x4ae, 14 },{ 0xbba, 14 }, +{ 0x2307, 14 },{ 0xb9a, 14 },{ 0x1736, 15 },{ 0xe, 4 }, +{ 0x45, 7 },{ 0x1f3, 9 },{ 0x47a, 11 },{ 0x5dc, 13 }, +{ 0x23df, 14 },{ 0x19, 5 },{ 0x28, 9 },{ 0x176, 11 }, +{ 0x49d, 13 },{ 0x23dd, 14 },{ 0x30, 6 },{ 0xa2, 10 }, +{ 0x2ef, 12 },{ 0x5b8, 14 },{ 0x3f, 6 },{ 0xa5, 10 }, +{ 0x3db, 12 },{ 0x93f, 14 },{ 0x44, 7 },{ 0x7cb, 11 }, +{ 0x95f, 15 },{ 0x63, 7 },{ 0x3c3, 12 },{ 0x15, 8 }, +{ 0x8f6, 12 },{ 0x17, 8 },{ 0x498, 13 },{ 0x2c, 8 }, +{ 0x7b2, 13 },{ 0x2f, 8 },{ 0x1f54, 13 },{ 0x8d, 8 }, +{ 0x7bd, 13 },{ 0x8e, 8 },{ 0x1182, 13 },{ 0xfb, 8 }, +{ 0x50b, 13 },{ 0x2d, 8 },{ 0x7c0, 11 },{ 0x79, 9 }, +{ 0x1f5f, 13 },{ 0x7a, 9 },{ 0x1f56, 13 },{ 0x231, 10 }, +{ 0x3e4, 10 },{ 0x1a1, 11 },{ 0x143, 11 },{ 0x1f7, 11 }, +{ 0x16f, 12 },{ 0x292, 12 },{ 0x2e7, 12 },{ 0x16c, 12 }, +{ 0x16d, 12 },{ 0x3dc, 12 },{ 0xf8b, 12 },{ 0x499, 13 }, +{ 0x3d8, 12 },{ 0x78e, 13 },{ 0x2d5, 13 },{ 0x1f5e, 13 }, +{ 0x1f2b, 13 },{ 0x78f, 13 },{ 0x4ad, 14 },{ 0x3eaf, 14 }, +{ 0x23dc, 14 },{ 0x4a, 9 }, +}; + +const INT8 table2_level[185] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 1, 2, 3, 4, 5, 6, 1, + 2, 3, 4, 5, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 4, + 1, 2, 3, 4, 1, 2, 3, 1, + 2, 3, 1, 2, 3, 1, 2, 3, + 1, 2, 3, 1, 2, 3, 1, 2, + 1, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 2, 3, 4, 5, 6, 1, 2, 3, + 4, 5, 1, 2, 3, 4, 1, 2, + 3, 4, 1, 2, 3, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, +}; + +const INT8 table2_run[185] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 4, 4, 4, 4, 4, 4, 5, + 5, 5, 5, 5, 6, 6, 6, 6, + 7, 7, 7, 7, 8, 8, 8, 8, + 9, 9, 9, 9, 10, 10, 10, 11, + 11, 11, 12, 12, 12, 13, 13, 13, + 14, 14, 14, 15, 15, 15, 16, 16, + 17, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 0, + 0, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 3, 3, + 3, 3, 4, 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, 14, + 14, 15, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, + 37, +}; + +/* second non intra vlc table */ +const UINT16 table4_vlc[169][2] = { +{ 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 }, +{ 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 }, +{ 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 }, +{ 0x5c9, 11 },{ 0x827, 12 },{ 0xb54, 12 },{ 0x4e6, 13 }, +{ 0x105f, 13 },{ 0x172a, 13 },{ 0x20b2, 14 },{ 0x2d4e, 14 }, +{ 0x39f0, 14 },{ 0x4175, 15 },{ 0x5a9e, 15 },{ 0x4, 4 }, +{ 0x1e, 5 },{ 0x42, 7 },{ 0xb6, 8 },{ 0x173, 9 }, +{ 0x395, 10 },{ 0x72e, 11 },{ 0xb94, 12 },{ 0x16a4, 13 }, +{ 0x20b3, 14 },{ 0x2e45, 14 },{ 0x5, 5 },{ 0x40, 7 }, +{ 0x49, 9 },{ 0x28f, 10 },{ 0x5cb, 11 },{ 0x48a, 13 }, +{ 0x9dd, 14 },{ 0x73e2, 15 },{ 0x18, 5 },{ 0x25, 8 }, +{ 0x8a, 10 },{ 0x51b, 11 },{ 0xe5f, 12 },{ 0x9c9, 14 }, +{ 0x139c, 15 },{ 0x29, 6 },{ 0x4f, 9 },{ 0x412, 11 }, +{ 0x48d, 13 },{ 0x2e41, 14 },{ 0x38, 6 },{ 0x10e, 9 }, +{ 0x5a8, 11 },{ 0x105c, 13 },{ 0x39f2, 14 },{ 0x58, 7 }, +{ 0x21f, 10 },{ 0xe7e, 12 },{ 0x39ff, 14 },{ 0x23, 8 }, +{ 0x2e3, 10 },{ 0x4e5, 13 },{ 0x2e40, 14 },{ 0xa1, 8 }, +{ 0x5be, 11 },{ 0x9c8, 14 },{ 0x83, 8 },{ 0x13a, 11 }, +{ 0x1721, 13 },{ 0x44, 9 },{ 0x276, 12 },{ 0x39f6, 14 }, +{ 0x8b, 10 },{ 0x4ef, 13 },{ 0x5a9b, 15 },{ 0x208, 10 }, +{ 0x1cfe, 13 },{ 0x399, 10 },{ 0x1cb4, 13 },{ 0x39e, 10 }, +{ 0x39f3, 14 },{ 0x5ab, 11 },{ 0x73e3, 15 },{ 0x737, 11 }, +{ 0x5a9f, 15 },{ 0x82d, 12 },{ 0xe69, 12 },{ 0xe68, 12 }, +{ 0x433, 11 },{ 0xb7b, 12 },{ 0x2df8, 14 },{ 0x2e56, 14 }, +{ 0x2e57, 14 },{ 0x39f7, 14 },{ 0x51a5, 15 },{ 0x3, 3 }, +{ 0x2a, 6 },{ 0xe4, 8 },{ 0x28e, 10 },{ 0x735, 11 }, +{ 0x1058, 13 },{ 0x1cfa, 13 },{ 0x2df9, 14 },{ 0x4174, 15 }, +{ 0x9, 4 },{ 0x54, 8 },{ 0x398, 10 },{ 0x48b, 13 }, +{ 0x139d, 15 },{ 0xd, 4 },{ 0xad, 9 },{ 0x826, 12 }, +{ 0x2d4c, 14 },{ 0x11, 5 },{ 0x16b, 9 },{ 0xb7f, 12 }, +{ 0x51a4, 15 },{ 0x19, 5 },{ 0x21b, 10 },{ 0x16fd, 13 }, +{ 0x1d, 5 },{ 0x394, 10 },{ 0x28d3, 14 },{ 0x2b, 6 }, +{ 0x5bc, 11 },{ 0x5a9a, 15 },{ 0x2f, 6 },{ 0x247, 12 }, +{ 0x10, 7 },{ 0xa35, 12 },{ 0x3e, 6 },{ 0xb7a, 12 }, +{ 0x59, 7 },{ 0x105e, 13 },{ 0x26, 8 },{ 0x9cf, 14 }, +{ 0x55, 8 },{ 0x1cb5, 13 },{ 0x57, 8 },{ 0xe5b, 12 }, +{ 0xa0, 8 },{ 0x1468, 13 },{ 0x170, 9 },{ 0x90, 10 }, +{ 0x1ce, 9 },{ 0x21a, 10 },{ 0x218, 10 },{ 0x168, 9 }, +{ 0x21e, 10 },{ 0x244, 12 },{ 0x736, 11 },{ 0x138, 11 }, +{ 0x519, 11 },{ 0xe5e, 12 },{ 0x72c, 11 },{ 0xb55, 12 }, +{ 0x9dc, 14 },{ 0x20bb, 14 },{ 0x48c, 13 },{ 0x1723, 13 }, +{ 0x2e44, 14 },{ 0x16a5, 13 },{ 0x518, 11 },{ 0x39fe, 14 }, +{ 0x169, 9 }, +}; + +const INT8 table4_level[168] = { + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 1, + 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 1, 2, 3, 4, 5, 6, + 7, 8, 1, 2, 3, 4, 5, 6, + 7, 1, 2, 3, 4, 5, 1, 2, + 3, 4, 5, 1, 2, 3, 4, 1, + 2, 3, 4, 1, 2, 3, 1, 2, + 3, 1, 2, 3, 1, 2, 3, 1, + 2, 1, 2, 1, 2, 1, 2, 1, + 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 1, 2, 3, 4, + 5, 1, 2, 3, 4, 1, 2, 3, + 4, 1, 2, 3, 1, 2, 3, 1, + 2, 3, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, +}; + +const INT8 table4_run[168] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 2, 2, 2, + 2, 2, 3, 3, 3, 3, 3, 3, + 3, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 5, 6, 6, 6, 6, 7, + 7, 7, 7, 8, 8, 8, 9, 9, + 9, 10, 10, 10, 11, 11, 11, 12, + 12, 13, 13, 14, 14, 15, 15, 16, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 3, 3, 3, + 3, 4, 4, 4, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, 13, + 14, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, +}; + +extern const UINT16 inter_vlc[103][2]; +extern const INT8 inter_level[102]; +extern const INT8 inter_run[102]; + +extern const UINT16 intra_vlc[103][2]; +extern const INT8 intra_level[102]; +extern const INT8 intra_run[102]; + +#define NB_RL_TABLES 6 + +static RLTable rl_table[NB_RL_TABLES] = { + /* intra luminance tables */ + { + 132, + 85, + table0_vlc, + table0_run, + table0_level, + }, + { + 185, + 119, + table2_vlc, + table2_run, + table2_level, + }, + { + 102, + 67, + intra_vlc, + intra_run, + intra_level, + }, + /* intra chrominance / non intra tables */ + { + 148, + 81, + table1_vlc, + table1_run, + table1_level, + }, + { + 168, + 99, + table4_vlc, + table4_run, + table4_level, + }, + { + 102, + 58, + inter_vlc, + inter_run, + inter_level, + }, +}; + +/* motion vector table 0 */ + +const UINT16 table0_mv_code[1100] = { + 0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001, + 0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f, + 0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b, + 0x004d, 0x0003, 0x0022, 0x0023, 0x0025, 0x0027, 0x0042, 0x0048, + 0x0049, 0x0050, 0x005c, 0x0091, 0x009f, 0x000e, 0x0043, 0x004c, + 0x0054, 0x0056, 0x008c, 0x0098, 0x009a, 0x009b, 0x00b1, 0x00b2, + 0x0120, 0x0121, 0x0126, 0x0133, 0x0139, 0x01a1, 0x01a4, 0x01a5, + 0x01a6, 0x01a7, 0x01ae, 0x01af, 0x000b, 0x0019, 0x0085, 0x0090, + 0x009b, 0x00aa, 0x00af, 0x010c, 0x010e, 0x011c, 0x011e, 0x0133, + 0x0144, 0x0160, 0x0174, 0x0175, 0x0177, 0x0178, 0x0249, 0x024b, + 0x0252, 0x0261, 0x0265, 0x0270, 0x0352, 0x0353, 0x0355, 0x0359, + 0x0010, 0x0011, 0x0013, 0x0034, 0x0035, 0x0036, 0x0037, 0x003d, + 0x003e, 0x0109, 0x0126, 0x0156, 0x021a, 0x021e, 0x023a, 0x023e, + 0x028e, 0x028f, 0x02cf, 0x0491, 0x0494, 0x049f, 0x04a0, 0x04a3, + 0x04a6, 0x04a7, 0x04ad, 0x04ae, 0x04c0, 0x04c4, 0x04c6, 0x04c8, + 0x04c9, 0x04f5, 0x04f6, 0x04f7, 0x0680, 0x0682, 0x0683, 0x0688, + 0x0689, 0x068d, 0x068e, 0x068f, 0x06a2, 0x06a3, 0x06a9, 0x06b0, + 0x06b1, 0x06b4, 0x06b5, 0x0024, 0x0060, 0x0063, 0x0078, 0x0079, + 0x0211, 0x0244, 0x0245, 0x0247, 0x0248, 0x0249, 0x024a, 0x024b, + 0x026b, 0x02af, 0x02b8, 0x02bb, 0x0436, 0x0476, 0x0477, 0x047e, + 0x04c8, 0x04c9, 0x04ca, 0x0514, 0x0586, 0x0587, 0x0598, 0x059d, + 0x05d9, 0x05da, 0x0920, 0x0921, 0x093b, 0x093c, 0x093d, 0x0942, + 0x0943, 0x0944, 0x0945, 0x0959, 0x095e, 0x095f, 0x0982, 0x0983, + 0x098e, 0x098f, 0x09c4, 0x09e7, 0x09e8, 0x09e9, 0x0d02, 0x0d17, + 0x0d18, 0x0d19, 0x0d41, 0x0d42, 0x0d43, 0x0d50, 0x0d5f, 0x0d6d, + 0x0d6e, 0x0d6f, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x041e, 0x041f, 0x0420, 0x0421, + 0x048c, 0x048d, 0x04d3, 0x04d4, 0x04d5, 0x055c, 0x055d, 0x0572, + 0x0573, 0x0574, 0x0575, 0x08de, 0x08df, 0x08fe, 0x08ff, 0x0996, + 0x0a36, 0x0a37, 0x0b08, 0x0b09, 0x0b0a, 0x0b0b, 0x0b32, 0x0b33, + 0x0b34, 0x0b35, 0x0b36, 0x0b37, 0x0b38, 0x0b39, 0x0bb0, 0x0bf7, + 0x0bf8, 0x0bf9, 0x0bfa, 0x0bfb, 0x0bfc, 0x0bfd, 0x0bfe, 0x0bff, + 0x1254, 0x1255, 0x1256, 0x1257, 0x1270, 0x1271, 0x1272, 0x1273, + 0x1274, 0x1275, 0x12ab, 0x12ac, 0x12ad, 0x12ae, 0x12af, 0x12b0, + 0x12b1, 0x1315, 0x1316, 0x1317, 0x13bf, 0x13c0, 0x13c1, 0x13c2, + 0x13c3, 0x13c4, 0x13c5, 0x13c6, 0x13c7, 0x13c8, 0x13c9, 0x13ca, + 0x13cb, 0x13cc, 0x13cd, 0x1a06, 0x1a07, 0x1a28, 0x1a29, 0x1a2a, + 0x1a2b, 0x1a2c, 0x1a2d, 0x1a80, 0x1abb, 0x1abc, 0x1abd, 0x1ad8, + 0x1ad9, 0x0094, 0x0095, 0x0096, 0x0097, 0x00a0, 0x00a1, 0x00a2, + 0x00a3, 0x0831, 0x0832, 0x0833, 0x0834, 0x0835, 0x0836, 0x0837, + 0x0838, 0x0839, 0x083a, 0x083b, 0x0939, 0x093a, 0x093b, 0x093c, + 0x093d, 0x093e, 0x093f, 0x09a0, 0x09a1, 0x09a2, 0x09a3, 0x09a4, + 0x09a5, 0x11ac, 0x11ad, 0x11ae, 0x11af, 0x11b0, 0x11b1, 0x11b2, + 0x11b3, 0x11b4, 0x11b5, 0x11b6, 0x11b7, 0x11b8, 0x11b9, 0x11ba, + 0x11bb, 0x132f, 0x1454, 0x1455, 0x1456, 0x1457, 0x1458, 0x1459, + 0x145a, 0x145b, 0x145c, 0x145d, 0x145e, 0x145f, 0x1460, 0x1461, + 0x1462, 0x1463, 0x1464, 0x1465, 0x1466, 0x1467, 0x1468, 0x1469, + 0x146a, 0x146b, 0x17de, 0x17df, 0x17e0, 0x17e1, 0x17e2, 0x17e3, + 0x17e4, 0x17e5, 0x17e6, 0x17e7, 0x17e8, 0x17e9, 0x17ea, 0x17eb, + 0x17ec, 0x17ed, 0x2540, 0x2541, 0x2542, 0x2543, 0x2544, 0x2545, + 0x2546, 0x2547, 0x2548, 0x2549, 0x254a, 0x254b, 0x254c, 0x254d, + 0x254e, 0x254f, 0x2550, 0x2551, 0x2552, 0x2553, 0x2554, 0x2555, + 0x2628, 0x2766, 0x2767, 0x2768, 0x2769, 0x276a, 0x276b, 0x276c, + 0x276d, 0x276e, 0x276f, 0x2770, 0x2771, 0x2772, 0x2773, 0x2774, + 0x2775, 0x2776, 0x2777, 0x2778, 0x2779, 0x277a, 0x277b, 0x277c, + 0x277d, 0x3503, 0x3544, 0x3545, 0x3546, 0x3547, 0x3560, 0x3561, + 0x3562, 0x3563, 0x3564, 0x3565, 0x3566, 0x3567, 0x3568, 0x3569, + 0x356a, 0x356b, 0x356c, 0x356d, 0x356e, 0x356f, 0x3570, 0x3571, + 0x3572, 0x3573, 0x3574, 0x3575, 0x03f0, 0x103d, 0x103e, 0x103f, + 0x1040, 0x1041, 0x1042, 0x1043, 0x1044, 0x1045, 0x1046, 0x1047, + 0x1048, 0x1049, 0x104a, 0x104b, 0x104c, 0x104d, 0x104e, 0x104f, + 0x1050, 0x1051, 0x1052, 0x1053, 0x1054, 0x1055, 0x1056, 0x1057, + 0x1058, 0x1059, 0x105a, 0x105b, 0x105c, 0x105d, 0x105e, 0x105f, + 0x1060, 0x1061, 0x1270, 0x1271, 0x21b8, 0x21b9, 0x21ba, 0x21bb, + 0x21bc, 0x21bd, 0x21be, 0x21bf, 0x21f0, 0x21f1, 0x21f2, 0x21f3, + 0x21f4, 0x21f5, 0x21f6, 0x21f7, 0x21f8, 0x21f9, 0x21fa, 0x21fb, + 0x21fc, 0x21fd, 0x21fe, 0x21ff, 0x2340, 0x2341, 0x2342, 0x2343, + 0x2344, 0x2345, 0x2346, 0x2347, 0x2348, 0x2349, 0x234a, 0x234b, + 0x234c, 0x234d, 0x234e, 0x234f, 0x2350, 0x2351, 0x2352, 0x2353, + 0x2354, 0x2355, 0x2356, 0x2357, 0x265c, 0x2f88, 0x2f89, 0x2f8a, + 0x2f8b, 0x2f8c, 0x2f8d, 0x2f8e, 0x2f8f, 0x2f90, 0x2f91, 0x2f92, + 0x2f93, 0x2f94, 0x2f95, 0x2f96, 0x2f97, 0x2f98, 0x2f99, 0x2f9a, + 0x2f9b, 0x2f9c, 0x2f9d, 0x2f9e, 0x2f9f, 0x2fa0, 0x2fa1, 0x2fa2, + 0x2fa3, 0x2fa4, 0x2fa5, 0x2fa6, 0x2fa7, 0x2fa8, 0x2fa9, 0x2faa, + 0x2fab, 0x2fac, 0x2fad, 0x2fae, 0x2faf, 0x2fb0, 0x2fb1, 0x2fb2, + 0x2fb3, 0x2fb4, 0x2fb5, 0x2fb6, 0x2fb7, 0x2fb8, 0x2fb9, 0x2fba, + 0x2fbb, 0x4c52, 0x4c53, 0x4e28, 0x4e29, 0x4e2a, 0x4e2b, 0x4e2c, + 0x4e2d, 0x4e2e, 0x4e2f, 0x4e30, 0x4e31, 0x4e32, 0x4e33, 0x4e34, + 0x4e35, 0x4e36, 0x4e37, 0x4e38, 0x4e39, 0x4e3a, 0x4e3b, 0x4e3c, + 0x4e3d, 0x4e3e, 0x4e3f, 0x4e80, 0x4e81, 0x4e82, 0x4e83, 0x4e84, + 0x4e85, 0x4e86, 0x4e87, 0x4e88, 0x4e89, 0x4e8a, 0x4e8b, 0x4e8c, + 0x4e8d, 0x4e8e, 0x4e8f, 0x4e90, 0x4e91, 0x4e92, 0x4e93, 0x4e94, + 0x4e95, 0x4e96, 0x4e97, 0x4e98, 0x4e99, 0x4e9a, 0x4e9b, 0x4e9c, + 0x4e9d, 0x4e9e, 0x4e9f, 0x4ea0, 0x4ea1, 0x4ea2, 0x4ea3, 0x4ea4, + 0x4ea5, 0x4ea6, 0x4ea7, 0x4ea8, 0x4ea9, 0x4eaa, 0x4eab, 0x4eac, + 0x4ead, 0x4eae, 0x4eaf, 0x4eb0, 0x4eb1, 0x4eb2, 0x4eb3, 0x4eb4, + 0x4eb5, 0x4eb6, 0x4eb7, 0x4eb8, 0x4eb9, 0x4eba, 0x4ebb, 0x4ebc, + 0x4ebd, 0x4ebe, 0x4ebf, 0x4ec0, 0x4ec1, 0x4ec2, 0x4ec3, 0x4ec4, + 0x4ec5, 0x4ec6, 0x4ec7, 0x4ec8, 0x4ec9, 0x4eca, 0x4ecb, 0x6a04, + 0x6a05, 0x07e2, 0x07e3, 0x07e4, 0x07e5, 0x07e6, 0x07e7, 0x07e8, + 0x07e9, 0x07ea, 0x07eb, 0x07ec, 0x07ed, 0x07ee, 0x07ef, 0x07f0, + 0x07f1, 0x07f2, 0x07f3, 0x07f4, 0x07f5, 0x07f6, 0x07f7, 0x07f8, + 0x07f9, 0x07fa, 0x07fb, 0x07fc, 0x07fd, 0x07fe, 0x07ff, 0x2000, + 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, + 0x2009, 0x200a, 0x200b, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, + 0x2011, 0x2012, 0x2013, 0x2014, 0x2015, 0x2016, 0x2017, 0x2018, + 0x2019, 0x201a, 0x201b, 0x201c, 0x201d, 0x201e, 0x201f, 0x2020, + 0x2021, 0x2022, 0x2023, 0x2024, 0x2025, 0x2026, 0x2027, 0x2028, + 0x2029, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e, 0x202f, 0x2030, + 0x2031, 0x2032, 0x2033, 0x2034, 0x2035, 0x2036, 0x2037, 0x2038, + 0x2039, 0x203a, 0x203b, 0x203c, 0x203d, 0x203e, 0x203f, 0x2040, + 0x2041, 0x2042, 0x2043, 0x2044, 0x2045, 0x2046, 0x2047, 0x2048, + 0x2049, 0x204a, 0x204b, 0x204c, 0x204d, 0x204e, 0x204f, 0x2050, + 0x2051, 0x2052, 0x2053, 0x2054, 0x2055, 0x2056, 0x2057, 0x2058, + 0x2059, 0x205a, 0x205b, 0x205c, 0x205d, 0x205e, 0x205f, 0x2060, + 0x2061, 0x2062, 0x2063, 0x2064, 0x2065, 0x2066, 0x2067, 0x2068, + 0x2069, 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0x2070, + 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, + 0x2079, 0x4cba, 0x4cbb, 0x5d88, 0x5d89, 0x5d8a, 0x5d8b, 0x5d8c, + 0x5d8d, 0x5d8e, 0x5d8f, 0x5db0, 0x5db1, 0x5db2, 0x5db3, 0x5db4, + 0x5db5, 0x5db6, 0x5db7, 0x5db8, 0x5db9, 0x5dba, 0x5dbb, 0x5dbc, + 0x5dbd, 0x5dbe, 0x5dbf, 0x5e40, 0x5e41, 0x5e42, 0x5e43, 0x5e44, + 0x5e45, 0x5e46, 0x5e47, 0x5e48, 0x5e49, 0x5e4a, 0x5e4b, 0x5e4c, + 0x5e4d, 0x5e4e, 0x5e4f, 0x5e50, 0x5e51, 0x5e52, 0x5e53, 0x5e54, + 0x5e55, 0x5e56, 0x5e57, 0x5e58, 0x5e59, 0x5e5a, 0x5e5b, 0x5e5c, + 0x5e5d, 0x5e5e, 0x5e5f, 0x5e60, 0x5e61, 0x5e62, 0x5e63, 0x5e64, + 0x5e65, 0x5e66, 0x5e67, 0x5e68, 0x5e69, 0x5e6a, 0x5e6b, 0x5e6c, + 0x5e6d, 0x5e6e, 0x5e6f, 0x5e70, 0x5e71, 0x5e72, 0x5e73, 0x5e74, + 0x5e75, 0x5e76, 0x5e77, 0x5e78, 0x5e79, 0x5e7a, 0x5e7b, 0x5e7c, + 0x5e7d, 0x5e7e, 0x5e7f, 0x5e80, 0x5e81, 0x5e82, 0x5e83, 0x5e84, + 0x5e85, 0x5e86, 0x5e87, 0x5e88, 0x5e89, 0x5e8a, 0x5e8b, 0x5e8c, + 0x5e8d, 0x5e8e, 0x5e8f, 0x5e90, 0x5e91, 0x5e92, 0x5e93, 0x5e94, + 0x5e95, 0x5e96, 0x5e97, 0x5e98, 0x5e99, 0x5e9a, 0x5e9b, 0x5e9c, + 0x5e9d, 0x5e9e, 0x5e9f, 0x5ea0, 0x5ea1, 0x5ea2, 0x5ea3, 0x5ea4, + 0x5ea5, 0x5ea6, 0x5ea7, 0x5ea8, 0x5ea9, 0x5eaa, 0x5eab, 0x5eac, + 0x5ead, 0x5eae, 0x5eaf, 0x5eb0, 0x5eb1, 0x5eb2, 0x5eb3, 0x5eb4, + 0x5eb5, 0x5eb6, 0x5eb7, 0x5eb8, 0x5eb9, 0x5eba, 0x5ebb, 0x5ebc, + 0x5ebd, 0x5ebe, 0x5ebf, 0x5ec0, 0x5ec1, 0x5ec2, 0x5ec3, 0x5ec4, + 0x5ec5, 0x5ec6, 0x5ec7, 0x5ec8, 0x5ec9, 0x5eca, 0x5ecb, 0x5ecc, + 0x5ecd, 0x5ece, 0x5ecf, 0x5ed0, 0x5ed1, 0x5ed2, 0x5ed3, 0x5ed4, + 0x5ed5, 0x5ed6, 0x5ed7, 0x5ed8, 0x5ed9, 0x5eda, 0x5edb, 0x5edc, + 0x5edd, 0x5ede, 0x5edf, 0x5ee0, 0x5ee1, 0x5ee2, 0x5ee3, 0x5ee4, + 0x5ee5, 0x5ee6, 0x5ee7, 0x5ee8, 0x5ee9, 0x5eea, 0x5eeb, 0x5eec, + 0x5eed, 0x5eee, 0x5eef, 0x5ef0, 0x5ef1, 0x5ef2, 0x5ef3, 0x5ef4, + 0x5ef5, 0x5ef6, 0x5ef7, 0x5ef8, 0x5ef9, 0x5efa, 0x5efb, 0x5efc, + 0x5efd, 0x5efe, 0x5eff, 0x5f00, 0x5f01, 0x5f02, 0x5f03, 0x5f04, + 0x5f05, 0x5f06, 0x5f07, 0x5f08, 0x5f09, 0x5f0a, 0x5f0b, 0x5f0c, + 0x5f0d, 0x5f0e, 0x5f0f, 0x0000, +}; + +const UINT8 table0_mv_bits[1100] = { + 1, 4, 4, 4, 5, 5, 5, 6, + 6, 6, 7, 7, 7, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, + 16, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 8, +}; + +const UINT8 table0_mvx[1099] = { + 32, 32, 31, 32, 33, 31, 33, 31, + 33, 32, 34, 32, 30, 32, 31, 34, + 35, 32, 34, 33, 29, 33, 30, 30, + 31, 31, 35, 29, 33, 35, 33, 34, + 31, 29, 30, 34, 30, 36, 28, 32, + 34, 37, 30, 27, 32, 25, 39, 32, + 34, 32, 35, 35, 35, 31, 35, 29, + 32, 29, 30, 29, 37, 27, 36, 38, + 37, 33, 32, 31, 29, 31, 28, 36, + 33, 30, 34, 33, 33, 28, 27, 25, + 31, 26, 39, 32, 32, 31, 33, 39, + 31, 38, 28, 36, 21, 23, 43, 36, + 34, 41, 30, 25, 28, 31, 30, 34, + 38, 35, 61, 34, 28, 30, 37, 37, + 35, 27, 36, 3, 59, 38, 37, 32, + 31, 29, 26, 33, 37, 33, 27, 27, + 35, 34, 34, 40, 42, 33, 32, 29, + 4, 5, 28, 24, 25, 35, 39, 38, + 32, 23, 27, 32, 30, 35, 26, 34, + 60, 36, 29, 22, 26, 41, 7, 30, + 38, 30, 36, 29, 30, 41, 26, 25, + 32, 34, 24, 39, 1, 25, 39, 32, + 28, 29, 32, 38, 26, 36, 28, 63, + 28, 39, 23, 21, 26, 35, 31, 35, + 57, 31, 29, 29, 28, 30, 27, 35, + 2, 38, 40, 34, 37, 29, 38, 43, + 26, 32, 33, 42, 24, 40, 28, 32, + 32, 32, 36, 32, 43, 25, 21, 31, + 30, 31, 41, 29, 33, 37, 26, 37, + 27, 59, 23, 33, 35, 31, 31, 37, + 38, 39, 32, 23, 32, 27, 37, 36, + 31, 40, 25, 27, 38, 31, 36, 28, + 31, 36, 25, 45, 3, 34, 38, 39, + 40, 38, 30, 32, 19, 24, 25, 26, + 45, 20, 24, 33, 33, 31, 41, 34, + 39, 47, 40, 58, 59, 41, 33, 3, + 17, 61, 42, 30, 26, 29, 36, 61, + 33, 37, 62, 28, 25, 38, 25, 38, + 17, 23, 34, 33, 21, 33, 49, 27, + 32, 23, 27, 22, 24, 22, 39, 43, + 27, 37, 6, 42, 47, 26, 30, 31, + 41, 39, 33, 22, 45, 36, 32, 45, + 19, 22, 30, 5, 5, 17, 29, 22, + 31, 31, 43, 37, 27, 32, 32, 32, + 33, 34, 43, 35, 29, 26, 22, 32, + 19, 32, 25, 31, 41, 49, 28, 34, + 28, 39, 34, 19, 37, 38, 29, 21, + 36, 42, 24, 48, 16, 28, 49, 22, + 34, 31, 38, 39, 44, 11, 35, 30, + 33, 33, 23, 28, 33, 46, 15, 13, + 24, 41, 24, 34, 34, 30, 26, 24, + 14, 60, 21, 29, 39, 23, 35, 37, + 63, 45, 33, 34, 47, 41, 22, 42, + 35, 35, 23, 32, 35, 43, 32, 7, + 31, 41, 20, 31, 16, 13, 63, 25, + 30, 32, 35, 30, 30, 31, 42, 47, + 39, 38, 40, 40, 51, 55, 56, 18, + 21, 39, 39, 33, 17, 41, 23, 24, + 43, 25, 31, 20, 19, 45, 1, 34, + 31, 22, 35, 15, 46, 46, 35, 31, + 28, 29, 29, 23, 41, 27, 14, 53, + 53, 27, 24, 32, 57, 32, 17, 42, + 37, 29, 33, 1, 25, 32, 32, 63, + 26, 40, 44, 36, 31, 39, 20, 20, + 44, 23, 33, 34, 35, 33, 33, 28, + 41, 23, 41, 41, 29, 25, 26, 49, + 29, 24, 37, 49, 50, 51, 51, 26, + 39, 25, 26, 15, 39, 18, 42, 17, + 4, 31, 32, 32, 60, 1, 42, 32, + 0, 12, 19, 35, 21, 41, 17, 26, + 20, 45, 46, 32, 37, 22, 47, 29, + 31, 27, 29, 30, 21, 33, 35, 18, + 25, 33, 50, 51, 42, 2, 15, 51, + 53, 33, 25, 29, 55, 37, 38, 33, + 38, 59, 38, 33, 39, 13, 32, 40, + 61, 61, 32, 9, 44, 3, 31, 29, + 25, 31, 27, 23, 9, 25, 9, 29, + 20, 30, 30, 42, 18, 28, 25, 28, + 28, 21, 29, 43, 29, 43, 26, 44, + 44, 21, 38, 21, 24, 45, 45, 35, + 39, 22, 35, 36, 34, 34, 45, 34, + 29, 31, 46, 25, 46, 16, 17, 31, + 20, 32, 47, 47, 47, 32, 49, 49, + 49, 31, 1, 27, 28, 39, 39, 21, + 36, 23, 51, 2, 40, 51, 32, 53, + 24, 30, 24, 30, 21, 40, 57, 57, + 31, 41, 58, 32, 12, 4, 32, 34, + 59, 31, 32, 13, 9, 35, 26, 35, + 37, 61, 37, 63, 26, 29, 41, 38, + 23, 20, 41, 26, 41, 42, 42, 42, + 26, 26, 26, 26, 1, 26, 37, 37, + 37, 23, 34, 42, 27, 43, 34, 27, + 31, 24, 33, 16, 3, 31, 24, 33, + 24, 4, 44, 44, 11, 44, 31, 13, + 13, 44, 45, 13, 25, 22, 38, 26, + 38, 38, 39, 32, 30, 39, 30, 22, + 32, 26, 30, 47, 47, 47, 19, 47, + 30, 31, 35, 8, 23, 47, 47, 27, + 35, 47, 31, 48, 35, 19, 36, 49, + 49, 33, 31, 39, 27, 39, 49, 49, + 50, 50, 50, 39, 31, 51, 51, 39, + 28, 33, 33, 21, 40, 31, 52, 53, + 40, 53, 9, 33, 31, 53, 54, 54, + 54, 55, 55, 34, 15, 56, 25, 56, + 21, 21, 40, 40, 25, 40, 58, 36, + 5, 41, 41, 12, 60, 41, 41, 37, + 22, 61, 18, 29, 29, 30, 61, 30, + 61, 62, 62, 30, 30, 63, 18, 13, + 30, 23, 19, 20, 20, 41, 13, 2, + 5, 5, 1, 5, 32, 6, 32, 35, + 20, 35, 27, 35, 35, 36, 36, 13, + 36, 41, 41, 41, 3, 30, 42, 27, + 20, 30, 27, 28, 30, 21, 33, 33, + 14, 24, 30, 42, 24, 33, 25, 42, + 43, 14, 43, 43, 14, 43, 7, 36, + 37, 37, 37, 37, 7, 14, 25, 43, + 43, 44, 15, 37, 7, 7, 3, 1, + 8, 15, 15, 8, 44, 44, 44, 45, + 45, 45, 45, 8, 8, 45, 21, 45, + 28, 28, 28, 21, 28, 28, 22, 37, + 46, 46, 37, 8, 29, 37, 29, 22, + 46, 37, 22, 29, 47, 47, 38, 38, + 16, 38, 38, 33, 38, 22, 47, 47, + 29, 25, 16, 0, 48, 1, 34, 48, + 48, 34, 25, 26, 26, 49, 49, 26, + 1, 49, 4, 26, 4, 49, 1, 9, + 49, 49, 49, 10, 49, 17, 38, 17, + 17, 50, 38, 50, 50, 22, 38, 51, + 38, 38, 51, 39, 39, 18, 22, 39, + 51, 22, 52, 52, 52, 39, 53, 53, + 10, 23, 18, 29, 10, 53, 29, 54, + 11, 54, 11, 11, 55, 1, 18, 55, + 55, 55, 55, 55, 55, 29, 34, 18, + 29, 56, 56, 34, 57, 34, 34, 29, + 29, 57, 57, 35, 35, 35, 35, 35, + 39, 35, 59, 59, 18, 59, 39, 30, + 18, 40, 60, 60, 61, 30, 18, 61, + 61, 19, 19, +}; + +const UINT8 table0_mvy[1099] = { + 32, 31, 32, 33, 32, 31, 31, 33, + 33, 34, 32, 30, 32, 35, 34, 31, + 32, 29, 33, 30, 32, 34, 33, 31, + 30, 35, 31, 31, 29, 33, 35, 30, + 29, 33, 34, 34, 30, 32, 32, 36, + 29, 32, 35, 32, 28, 32, 32, 27, + 35, 37, 34, 29, 30, 36, 35, 34, + 25, 30, 29, 35, 33, 31, 31, 32, + 31, 28, 39, 28, 29, 37, 31, 33, + 27, 36, 28, 36, 37, 33, 33, 31, + 27, 32, 31, 38, 26, 25, 25, 33, + 39, 31, 34, 30, 32, 32, 32, 34, + 36, 32, 28, 33, 30, 38, 37, 27, + 33, 28, 32, 37, 35, 38, 29, 34, + 27, 29, 29, 32, 32, 34, 35, 3, + 26, 36, 31, 38, 30, 26, 35, 34, + 37, 26, 25, 32, 32, 39, 23, 37, + 32, 32, 29, 32, 29, 36, 29, 30, + 41, 31, 30, 21, 39, 25, 34, 38, + 32, 35, 39, 32, 33, 33, 32, 27, + 29, 25, 28, 27, 26, 31, 30, 35, + 24, 24, 31, 34, 32, 30, 35, 40, + 28, 38, 5, 35, 29, 36, 36, 32, + 38, 30, 33, 31, 35, 26, 23, 38, + 32, 41, 28, 25, 37, 40, 37, 39, + 32, 36, 33, 39, 25, 26, 28, 31, + 28, 42, 23, 31, 33, 31, 39, 1, + 59, 22, 27, 4, 33, 34, 33, 24, + 41, 3, 35, 41, 41, 28, 36, 36, + 28, 33, 35, 21, 23, 21, 22, 37, + 27, 27, 43, 29, 60, 39, 27, 25, + 59, 34, 27, 27, 26, 40, 37, 27, + 61, 26, 39, 33, 31, 22, 37, 25, + 30, 25, 24, 61, 31, 34, 25, 38, + 32, 32, 30, 3, 61, 43, 29, 23, + 28, 32, 28, 32, 31, 34, 5, 33, + 32, 33, 33, 42, 37, 23, 38, 31, + 40, 26, 32, 26, 37, 38, 36, 24, + 29, 30, 20, 22, 29, 24, 32, 41, + 2, 34, 25, 33, 29, 31, 39, 35, + 36, 24, 32, 30, 33, 27, 44, 60, + 30, 36, 19, 34, 31, 24, 16, 35, + 32, 38, 21, 33, 31, 31, 21, 35, + 5, 17, 29, 38, 38, 18, 58, 19, + 43, 41, 30, 41, 43, 39, 29, 7, + 29, 17, 28, 19, 28, 31, 25, 19, + 40, 26, 21, 33, 39, 23, 40, 30, + 39, 34, 35, 32, 32, 24, 33, 30, + 40, 47, 39, 37, 32, 33, 24, 23, + 45, 47, 27, 23, 42, 32, 32, 33, + 36, 37, 37, 17, 18, 22, 40, 38, + 32, 31, 35, 24, 17, 25, 17, 23, + 33, 34, 51, 42, 31, 36, 36, 29, + 21, 22, 37, 44, 43, 25, 47, 33, + 45, 27, 31, 58, 31, 32, 31, 38, + 43, 20, 47, 45, 54, 1, 26, 34, + 38, 14, 22, 24, 33, 34, 32, 32, + 37, 21, 23, 49, 35, 23, 28, 39, + 39, 23, 55, 33, 30, 30, 63, 16, + 42, 28, 13, 33, 33, 35, 19, 46, + 43, 17, 19, 36, 39, 24, 31, 32, + 33, 26, 28, 62, 33, 63, 33, 39, + 19, 49, 17, 31, 43, 13, 15, 29, + 25, 35, 33, 23, 49, 41, 28, 29, + 34, 38, 7, 61, 11, 50, 13, 41, + 19, 47, 25, 26, 15, 42, 41, 29, + 45, 27, 17, 35, 32, 29, 32, 24, + 13, 26, 26, 31, 24, 33, 28, 30, + 31, 11, 45, 46, 33, 33, 35, 57, + 32, 32, 35, 45, 34, 11, 37, 42, + 39, 37, 31, 49, 21, 27, 29, 47, + 53, 40, 51, 16, 26, 1, 40, 30, + 41, 44, 34, 25, 27, 31, 35, 35, + 31, 15, 49, 1, 35, 40, 5, 58, + 21, 29, 22, 59, 45, 31, 9, 26, + 9, 29, 11, 32, 30, 3, 13, 20, + 18, 20, 11, 3, 29, 40, 31, 53, + 30, 17, 20, 37, 31, 42, 47, 47, + 54, 38, 9, 34, 13, 37, 21, 25, + 27, 43, 42, 45, 40, 25, 27, 46, + 22, 25, 53, 20, 2, 14, 39, 15, + 22, 44, 34, 21, 38, 33, 27, 48, + 34, 52, 35, 47, 49, 54, 2, 13, + 23, 52, 29, 45, 22, 49, 54, 21, + 40, 42, 31, 30, 29, 34, 0, 25, + 23, 51, 24, 59, 28, 38, 29, 31, + 2, 13, 31, 8, 31, 33, 12, 45, + 41, 7, 14, 30, 25, 18, 43, 20, + 43, 35, 44, 1, 49, 42, 42, 18, + 41, 38, 41, 44, 53, 11, 20, 25, + 45, 46, 47, 48, 39, 52, 46, 49, + 63, 55, 44, 38, 13, 13, 57, 22, + 51, 16, 12, 28, 35, 57, 25, 20, + 26, 28, 28, 29, 32, 31, 62, 34, + 35, 35, 19, 49, 48, 39, 40, 18, + 43, 46, 11, 6, 48, 19, 49, 41, + 10, 23, 58, 17, 21, 23, 34, 30, + 60, 0, 44, 34, 26, 37, 46, 43, + 49, 59, 4, 34, 59, 37, 22, 25, + 28, 46, 6, 40, 59, 42, 36, 61, + 28, 30, 31, 43, 10, 22, 23, 47, + 20, 52, 55, 36, 25, 16, 1, 11, + 27, 29, 5, 63, 18, 41, 31, 34, + 38, 1, 5, 13, 28, 31, 17, 38, + 39, 41, 36, 37, 22, 39, 33, 43, + 43, 15, 17, 49, 30, 21, 22, 20, + 10, 17, 25, 54, 57, 3, 34, 8, + 36, 25, 31, 14, 15, 19, 29, 25, + 18, 39, 53, 22, 27, 20, 29, 33, + 41, 42, 35, 62, 50, 29, 53, 50, + 35, 55, 42, 61, 63, 4, 7, 42, + 21, 46, 47, 49, 27, 46, 17, 55, + 41, 50, 63, 4, 56, 18, 8, 10, + 18, 51, 63, 36, 55, 18, 5, 55, + 9, 29, 17, 21, 30, 27, 1, 59, + 7, 11, 12, 15, 5, 42, 24, 41, + 43, 7, 27, 22, 25, 31, 30, 37, + 22, 39, 53, 29, 36, 37, 48, 0, + 5, 13, 17, 31, 32, 26, 46, 28, + 44, 45, 46, 53, 49, 51, 3, 41, + 3, 22, 42, 33, 5, 45, 7, 22, + 40, 53, 24, 14, 25, 27, 10, 12, + 34, 16, 17, 53, 20, 26, 39, 45, + 18, 45, 35, 33, 31, 49, 4, 39, + 42, 11, 51, 5, 13, 26, 27, 17, + 52, 30, 0, 22, 12, 34, 62, 36, + 38, 41, 47, 30, 63, 38, 41, 43, + 59, 33, 45, 37, 38, 40, 47, 24, + 48, 49, 30, 1, 10, 22, 49, 15, + 39, 59, 31, 32, 33, 18, 13, 15, + 31, 21, 27, 44, 42, 39, 46, 17, + 26, 32, 30, 31, 0, 30, 34, 9, + 12, 13, 25, 31, 32, 55, 43, 35, + 61, 33, 35, 46, 25, 47, 48, 62, + 63, 38, 61, 1, 2, 5, 7, 9, + 46, 10, 34, 35, 36, 55, 51, 7, + 40, 23, 34, 37, 5, 13, 42, 18, + 25, 27, 28, +}; + +/* motion vector table 1 */ +const UINT16 table1_mv_code[1100] = { + 0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c, + 0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041, + 0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069, + 0x006b, 0x00a6, 0x00c1, 0x00cb, 0x00cc, 0x00ce, 0x00da, 0x00e8, + 0x00ee, 0x0087, 0x0090, 0x009e, 0x009f, 0x00ba, 0x00ca, 0x00d8, + 0x00db, 0x00df, 0x0104, 0x0109, 0x010c, 0x0143, 0x0145, 0x014a, + 0x0156, 0x015c, 0x01b3, 0x01d3, 0x01da, 0x0103, 0x0109, 0x010b, + 0x0122, 0x0127, 0x0134, 0x0161, 0x0164, 0x0176, 0x0184, 0x018d, + 0x018e, 0x018f, 0x0190, 0x0193, 0x0196, 0x019d, 0x019e, 0x019f, + 0x01a9, 0x01b2, 0x01b4, 0x01ba, 0x01bb, 0x01bc, 0x0201, 0x0202, + 0x0205, 0x0207, 0x020d, 0x0210, 0x0211, 0x0215, 0x021b, 0x021f, + 0x0281, 0x0285, 0x0290, 0x029c, 0x029d, 0x02a2, 0x02a7, 0x02a8, + 0x02aa, 0x02b0, 0x02b1, 0x02b4, 0x02bc, 0x02bf, 0x0320, 0x0326, + 0x0327, 0x0329, 0x032a, 0x0336, 0x0360, 0x0362, 0x0363, 0x0372, + 0x03b2, 0x03bc, 0x03bd, 0x0203, 0x0205, 0x021a, 0x0249, 0x024a, + 0x024c, 0x02c7, 0x02ca, 0x02ce, 0x02ef, 0x030d, 0x0322, 0x0325, + 0x0338, 0x0373, 0x037a, 0x0409, 0x0415, 0x0416, 0x0418, 0x0428, + 0x042d, 0x042f, 0x0434, 0x0508, 0x0509, 0x0510, 0x0511, 0x051c, + 0x051e, 0x0524, 0x0541, 0x0543, 0x0546, 0x0547, 0x054d, 0x0557, + 0x055f, 0x056a, 0x056c, 0x056d, 0x056f, 0x0576, 0x0577, 0x057a, + 0x057b, 0x057c, 0x057d, 0x0600, 0x0601, 0x0603, 0x0614, 0x0616, + 0x0617, 0x061c, 0x061f, 0x0642, 0x0648, 0x0649, 0x064a, 0x064b, + 0x0657, 0x0668, 0x0669, 0x066b, 0x066e, 0x067f, 0x06c2, 0x06c8, + 0x06cb, 0x06de, 0x06df, 0x06e2, 0x06e3, 0x06ef, 0x0748, 0x074b, + 0x076e, 0x076f, 0x077c, 0x0409, 0x0423, 0x0428, 0x0429, 0x042a, + 0x042b, 0x0432, 0x0433, 0x0496, 0x049a, 0x04d5, 0x04db, 0x0581, + 0x0582, 0x058b, 0x058c, 0x058d, 0x0598, 0x0599, 0x059a, 0x059e, + 0x05dd, 0x0619, 0x0632, 0x0633, 0x0648, 0x0672, 0x06a1, 0x06a2, + 0x06a3, 0x06af, 0x06e2, 0x06e3, 0x06e4, 0x0800, 0x0801, 0x0802, + 0x0803, 0x081a, 0x081b, 0x0829, 0x082f, 0x0832, 0x083e, 0x083f, + 0x0852, 0x0853, 0x0858, 0x086b, 0x0877, 0x0878, 0x0879, 0x087a, + 0x087b, 0x0a00, 0x0a01, 0x0a0d, 0x0a0e, 0x0a0f, 0x0a24, 0x0a37, + 0x0a3a, 0x0a3b, 0x0a3e, 0x0a46, 0x0a47, 0x0a4a, 0x0a4b, 0x0a5f, + 0x0a79, 0x0a7a, 0x0a7b, 0x0a80, 0x0a81, 0x0a84, 0x0a85, 0x0a99, + 0x0aa5, 0x0aa6, 0x0ab8, 0x0aba, 0x0abb, 0x0abc, 0x0abd, 0x0ac8, + 0x0ace, 0x0acf, 0x0ad7, 0x0adc, 0x0aeb, 0x0c04, 0x0c25, 0x0c26, + 0x0c27, 0x0c2a, 0x0c2b, 0x0c3a, 0x0c3b, 0x0c3c, 0x0c3d, 0x0ca0, + 0x0cad, 0x0cd4, 0x0cd5, 0x0cfc, 0x0cfd, 0x0d86, 0x0d92, 0x0d93, + 0x0d94, 0x0d95, 0x0db0, 0x0db8, 0x0db9, 0x0dba, 0x0dbb, 0x0dc0, + 0x0dc2, 0x0dc3, 0x0dda, 0x0ddb, 0x0ddc, 0x0ddd, 0x0e92, 0x0e93, + 0x0e94, 0x0e95, 0x0ec7, 0x0ecc, 0x0ece, 0x0ecf, 0x0ed8, 0x0ed9, + 0x0eda, 0x0edb, 0x0808, 0x0809, 0x080a, 0x0810, 0x0811, 0x0844, + 0x0845, 0x0861, 0x0862, 0x0863, 0x086c, 0x0922, 0x0923, 0x092e, + 0x092f, 0x0936, 0x0937, 0x09b1, 0x09b2, 0x09b3, 0x09b4, 0x09b5, + 0x09b8, 0x09b9, 0x09ba, 0x09bb, 0x09bc, 0x09bd, 0x09be, 0x09bf, + 0x0b00, 0x0b15, 0x0b2c, 0x0b2d, 0x0b2e, 0x0b2f, 0x0b36, 0x0bb9, + 0x0c28, 0x0c2a, 0x0c2b, 0x0c2c, 0x0c2d, 0x0c2e, 0x0c2f, 0x0c30, + 0x0c31, 0x0c38, 0x0c60, 0x0c61, 0x0c62, 0x0c63, 0x0c8d, 0x0c8e, + 0x0c8f, 0x0c92, 0x0cbe, 0x0cbf, 0x0ce6, 0x0ce7, 0x0d40, 0x0d41, + 0x0d57, 0x0d58, 0x0d59, 0x0d5a, 0x0d5b, 0x0d5c, 0x0d5d, 0x0d98, + 0x0d99, 0x0d9a, 0x0d9b, 0x0d9c, 0x0d9d, 0x0dad, 0x0dae, 0x0daf, + 0x0dc0, 0x0dc1, 0x0dc2, 0x0dc3, 0x0dca, 0x0dcb, 0x0dec, 0x0ded, + 0x0dee, 0x0def, 0x1018, 0x1022, 0x1023, 0x1030, 0x1031, 0x1032, + 0x1033, 0x1050, 0x1051, 0x105c, 0x1074, 0x1075, 0x1076, 0x1077, + 0x1078, 0x1079, 0x107a, 0x107b, 0x10b2, 0x10b3, 0x10b8, 0x10b9, + 0x10ba, 0x10bb, 0x10d4, 0x10ea, 0x10eb, 0x10ec, 0x10ed, 0x1404, + 0x1405, 0x1406, 0x1407, 0x1410, 0x1411, 0x1412, 0x1413, 0x1414, + 0x1415, 0x1416, 0x1417, 0x1418, 0x1419, 0x1466, 0x1467, 0x1468, + 0x1469, 0x146a, 0x146b, 0x146c, 0x146d, 0x147e, 0x147f, 0x1488, + 0x1489, 0x148a, 0x148b, 0x14b6, 0x14b7, 0x14b8, 0x14b9, 0x14ba, + 0x14bb, 0x14bc, 0x14bd, 0x14f0, 0x14f1, 0x14f8, 0x14f9, 0x14fa, + 0x14fb, 0x14fc, 0x14fd, 0x14fe, 0x14ff, 0x152a, 0x152b, 0x152c, + 0x152d, 0x152e, 0x152f, 0x1530, 0x1531, 0x1548, 0x1549, 0x154e, + 0x154f, 0x1558, 0x1559, 0x155a, 0x155b, 0x1572, 0x159a, 0x159b, + 0x15ac, 0x15ba, 0x15bb, 0x15d0, 0x15d1, 0x15d2, 0x15d3, 0x15d4, + 0x15d5, 0x181d, 0x181e, 0x181f, 0x1840, 0x1841, 0x1842, 0x1843, + 0x1844, 0x1845, 0x1846, 0x1847, 0x1848, 0x1849, 0x1861, 0x1862, + 0x1863, 0x1864, 0x1865, 0x1866, 0x1867, 0x1868, 0x1869, 0x186a, + 0x186b, 0x186c, 0x186d, 0x186e, 0x191b, 0x191c, 0x191d, 0x191e, + 0x191f, 0x1942, 0x1943, 0x1944, 0x1945, 0x1946, 0x1947, 0x1958, + 0x1959, 0x19ed, 0x19ee, 0x19ef, 0x19f0, 0x19f1, 0x19f2, 0x19f3, + 0x19f4, 0x19f5, 0x19f6, 0x19f7, 0x1b0e, 0x1b0f, 0x1b62, 0x1b63, + 0x1b64, 0x1b65, 0x1b66, 0x1b67, 0x1b68, 0x1b69, 0x1b6a, 0x1b6b, + 0x1b6c, 0x1b6d, 0x1b6e, 0x1b6f, 0x1b82, 0x1ba8, 0x1ba9, 0x1baa, + 0x1bab, 0x1bac, 0x1bad, 0x1bae, 0x1baf, 0x1bb0, 0x1bb1, 0x1bb2, + 0x1bb3, 0x1d80, 0x1d81, 0x1d82, 0x1d83, 0x1d84, 0x1d85, 0x1d86, + 0x1d87, 0x1d88, 0x1d89, 0x1d8a, 0x1d8b, 0x1d8c, 0x1d8d, 0x1007, + 0x1008, 0x1009, 0x100a, 0x100b, 0x100c, 0x100d, 0x100e, 0x100f, + 0x1016, 0x1080, 0x1081, 0x1082, 0x1083, 0x1084, 0x1085, 0x1086, + 0x1087, 0x10c0, 0x123a, 0x123b, 0x123c, 0x123d, 0x123e, 0x123f, + 0x1240, 0x1241, 0x1242, 0x1243, 0x1350, 0x1352, 0x1353, 0x1358, + 0x1359, 0x135a, 0x135b, 0x135c, 0x135d, 0x135e, 0x135f, 0x1360, + 0x1361, 0x1602, 0x1603, 0x160c, 0x160d, 0x160e, 0x160f, 0x1620, + 0x1621, 0x1622, 0x1623, 0x1624, 0x1625, 0x1626, 0x1627, 0x1628, + 0x1629, 0x166e, 0x166f, 0x167c, 0x167d, 0x167e, 0x167f, 0x1770, + 0x1771, 0x1852, 0x1853, 0x1872, 0x1873, 0x1874, 0x1875, 0x1876, + 0x1877, 0x1878, 0x1879, 0x187a, 0x187b, 0x187c, 0x187d, 0x187e, + 0x187f, 0x1918, 0x1919, 0x1926, 0x1927, 0x1970, 0x1971, 0x1972, + 0x1973, 0x1974, 0x1975, 0x1976, 0x1977, 0x1978, 0x1979, 0x197a, + 0x197b, 0x1aa0, 0x1aa1, 0x1aa2, 0x1aa3, 0x1aa4, 0x1aa5, 0x1aa6, + 0x1aa7, 0x1aa8, 0x1aa9, 0x1aaa, 0x1aab, 0x1aac, 0x1aad, 0x1b3c, + 0x1b3d, 0x1b3e, 0x1b3f, 0x1b50, 0x1b51, 0x1b52, 0x1b53, 0x1b54, + 0x1b55, 0x1b56, 0x1b57, 0x1b58, 0x1b59, 0x2032, 0x2033, 0x2034, + 0x2035, 0x2036, 0x2037, 0x2038, 0x2039, 0x203a, 0x203b, 0x203c, + 0x203d, 0x203e, 0x203f, 0x2040, 0x2041, 0x2042, 0x2043, 0x20ba, + 0x20bb, 0x20cc, 0x20cd, 0x20ce, 0x20cf, 0x20e0, 0x20e1, 0x20e2, + 0x20e3, 0x20e4, 0x20e5, 0x20e6, 0x20e7, 0x21aa, 0x21ab, 0x21c0, + 0x21c1, 0x21c2, 0x21c3, 0x21c4, 0x21c5, 0x21c6, 0x21c7, 0x21c8, + 0x21c9, 0x21ca, 0x21cb, 0x21cc, 0x21cd, 0x21ce, 0x21cf, 0x21d0, + 0x21d1, 0x21d2, 0x21d3, 0x2894, 0x2895, 0x2896, 0x2897, 0x2898, + 0x2899, 0x289a, 0x289b, 0x289c, 0x289d, 0x289e, 0x289f, 0x28c0, + 0x28c1, 0x28c2, 0x28c3, 0x28c4, 0x28c5, 0x28c6, 0x28c7, 0x28c8, + 0x28c9, 0x28ca, 0x28cb, 0x2930, 0x2931, 0x2932, 0x2933, 0x2934, + 0x2935, 0x2936, 0x2937, 0x2938, 0x2939, 0x293a, 0x293b, 0x293c, + 0x293d, 0x293e, 0x293f, 0x2960, 0x2961, 0x2962, 0x2963, 0x2964, + 0x2965, 0x2966, 0x2967, 0x2968, 0x2969, 0x296a, 0x296b, 0x2a40, + 0x2a41, 0x2a42, 0x2a43, 0x2a44, 0x2a45, 0x2a46, 0x2a47, 0x2a48, + 0x2a49, 0x2a4a, 0x2a4b, 0x2a4c, 0x2a4d, 0x2a4e, 0x2a4f, 0x2a50, + 0x2a51, 0x2a52, 0x2a53, 0x2ae6, 0x2ae7, 0x2b24, 0x2b25, 0x2b26, + 0x2b27, 0x2b28, 0x2b29, 0x2b2a, 0x2b2b, 0x2b2c, 0x2b2d, 0x2b2e, + 0x2b2f, 0x2b30, 0x2b31, 0x2b32, 0x2b33, 0x2b5a, 0x2b5b, 0x3014, + 0x3015, 0x3016, 0x3017, 0x3020, 0x3021, 0x3022, 0x3023, 0x3024, + 0x3025, 0x3026, 0x3027, 0x3028, 0x3029, 0x302a, 0x302b, 0x302c, + 0x302d, 0x302e, 0x302f, 0x3030, 0x3031, 0x3032, 0x3033, 0x3034, + 0x3035, 0x3036, 0x3037, 0x3038, 0x3039, 0x30c0, 0x30c1, 0x30de, + 0x30df, 0x3218, 0x3219, 0x321a, 0x321b, 0x321c, 0x321d, 0x321e, + 0x321f, 0x3220, 0x3221, 0x3222, 0x3223, 0x3224, 0x3225, 0x3226, + 0x3227, 0x3228, 0x3229, 0x322a, 0x322b, 0x322c, 0x322d, 0x322e, + 0x322f, 0x3230, 0x3231, 0x3232, 0x3233, 0x3234, 0x3235, 0x3378, + 0x3379, 0x337a, 0x337b, 0x337c, 0x337d, 0x337e, 0x337f, 0x33c0, + 0x33c1, 0x33c2, 0x33c3, 0x33c4, 0x33c5, 0x33c6, 0x33c7, 0x33c8, + 0x33c9, 0x33ca, 0x33cb, 0x33cc, 0x33cd, 0x33ce, 0x33cf, 0x33d0, + 0x33d1, 0x33d2, 0x33d3, 0x33d4, 0x33d5, 0x33d6, 0x33d7, 0x33d8, + 0x33d9, 0x3706, 0x3707, 0x3730, 0x3731, 0x3732, 0x3733, 0x3734, + 0x3735, 0x3736, 0x3737, 0x3738, 0x3739, 0x373a, 0x373b, 0x373c, + 0x373d, 0x373e, 0x373f, 0x3740, 0x3741, 0x3742, 0x3743, 0x3744, + 0x3745, 0x3746, 0x3747, 0x3748, 0x3749, 0x374a, 0x374b, 0x374c, + 0x374d, 0x374e, 0x374f, 0x3b34, 0x3b35, 0x3b36, 0x3b37, 0x3be8, + 0x3be9, 0x3bea, 0x3beb, 0x3bec, 0x3bed, 0x3bee, 0x3bef, 0x3bf0, + 0x3bf1, 0x3bf2, 0x3bf3, 0x3bf4, 0x3bf5, 0x3bf6, 0x3bf7, 0x3bf8, + 0x3bf9, 0x3bfa, 0x3bfb, 0x3bfc, 0x3bfd, 0x3bfe, 0x3bff, 0x2000, + 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, + 0x2009, 0x200a, 0x200b, 0x200c, 0x200d, 0x202e, 0x202f, 0x2182, + 0x2183, 0x21b4, 0x21b5, 0x21b6, 0x21b7, 0x21b8, 0x21b9, 0x21ba, + 0x21bb, 0x21bc, 0x21bd, 0x21be, 0x21bf, 0x2460, 0x2461, 0x2462, + 0x2463, 0x2464, 0x2465, 0x2466, 0x2467, 0x2468, 0x2469, 0x246a, + 0x246b, 0x246c, 0x246d, 0x246e, 0x246f, 0x2470, 0x2471, 0x2472, + 0x2473, 0x26a2, 0x26a3, 0x000b, +}; + +const UINT8 table1_mv_bits[1100] = { + 2, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 7, 7, 7, 7, 7, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 4, +}; + +const UINT8 table1_mvx[1099] = { + 32, 31, 32, 31, 33, 32, 33, 33, + 31, 34, 30, 32, 32, 34, 35, 32, + 34, 33, 29, 30, 30, 32, 31, 31, + 33, 35, 35, 33, 31, 29, 29, 33, + 34, 30, 31, 28, 36, 30, 34, 32, + 32, 37, 32, 32, 25, 27, 39, 32, + 32, 32, 38, 35, 36, 32, 37, 61, + 26, 32, 34, 35, 3, 35, 27, 28, + 29, 34, 28, 37, 31, 36, 32, 27, + 31, 30, 29, 39, 33, 29, 33, 35, + 25, 25, 29, 33, 31, 31, 31, 33, + 32, 30, 32, 32, 41, 39, 33, 36, + 32, 28, 34, 36, 38, 24, 60, 31, + 23, 28, 32, 33, 59, 32, 40, 30, + 5, 34, 32, 38, 32, 30, 43, 4, + 32, 32, 42, 31, 31, 32, 26, 38, + 26, 22, 21, 37, 61, 63, 37, 31, + 32, 33, 2, 1, 23, 33, 41, 27, + 35, 30, 38, 23, 33, 3, 28, 34, + 34, 27, 41, 29, 39, 35, 36, 29, + 32, 27, 30, 32, 24, 61, 37, 26, + 59, 25, 35, 27, 36, 37, 30, 31, + 34, 40, 3, 28, 34, 39, 32, 31, + 32, 30, 24, 28, 35, 36, 26, 32, + 31, 33, 29, 33, 39, 25, 30, 24, + 35, 59, 29, 34, 25, 30, 21, 35, + 43, 40, 32, 29, 5, 28, 31, 62, + 33, 33, 25, 31, 21, 31, 43, 31, + 34, 33, 20, 40, 39, 31, 31, 57, + 38, 32, 42, 33, 32, 31, 32, 29, + 30, 44, 5, 31, 22, 34, 36, 17, + 38, 58, 38, 35, 32, 60, 35, 24, + 32, 38, 16, 45, 42, 32, 31, 29, + 4, 30, 17, 40, 46, 48, 63, 32, + 42, 19, 41, 22, 28, 36, 45, 33, + 33, 32, 29, 7, 41, 42, 18, 33, + 33, 32, 22, 37, 1, 26, 22, 23, + 49, 28, 26, 27, 32, 33, 27, 23, + 28, 36, 15, 6, 34, 27, 31, 26, + 23, 2, 33, 32, 34, 41, 28, 32, + 41, 0, 36, 38, 34, 31, 47, 32, + 17, 31, 39, 33, 37, 51, 30, 47, + 32, 50, 32, 19, 63, 30, 25, 27, + 33, 62, 24, 31, 27, 30, 37, 31, + 45, 32, 39, 20, 46, 47, 35, 19, + 34, 1, 49, 21, 21, 14, 51, 26, + 23, 31, 36, 35, 58, 29, 29, 21, + 20, 42, 13, 28, 12, 40, 31, 33, + 39, 60, 32, 44, 33, 31, 28, 37, + 29, 32, 30, 49, 43, 28, 39, 25, + 32, 48, 2, 15, 20, 25, 31, 28, + 21, 24, 25, 15, 31, 17, 37, 43, + 18, 32, 33, 24, 33, 36, 13, 33, + 31, 39, 11, 31, 33, 32, 39, 37, + 32, 32, 29, 17, 44, 46, 36, 35, + 26, 37, 58, 32, 34, 38, 8, 38, + 38, 22, 29, 25, 16, 35, 32, 35, + 33, 43, 18, 46, 38, 50, 33, 18, + 53, 60, 13, 32, 36, 33, 51, 36, + 43, 45, 27, 42, 29, 24, 30, 25, + 31, 52, 31, 35, 38, 9, 22, 34, + 4, 17, 28, 55, 42, 25, 17, 20, + 47, 34, 33, 16, 40, 25, 16, 30, + 53, 29, 10, 11, 14, 26, 33, 4, + 35, 44, 26, 16, 31, 26, 34, 38, + 29, 31, 30, 24, 22, 61, 32, 9, + 45, 34, 31, 19, 9, 31, 46, 31, + 35, 54, 29, 57, 30, 50, 3, 31, + 63, 34, 47, 41, 51, 18, 31, 14, + 37, 38, 31, 24, 32, 31, 50, 33, + 31, 54, 27, 9, 33, 23, 19, 32, + 29, 29, 33, 28, 47, 49, 30, 47, + 33, 27, 25, 54, 44, 45, 50, 58, + 51, 48, 33, 59, 33, 34, 57, 13, + 26, 33, 13, 48, 30, 11, 7, 56, + 34, 55, 26, 0, 26, 35, 1, 51, + 33, 53, 31, 45, 12, 29, 29, 51, + 31, 48, 2, 6, 34, 30, 28, 33, + 60, 40, 27, 46, 31, 9, 35, 29, + 31, 39, 55, 46, 19, 37, 62, 34, + 30, 16, 19, 49, 41, 41, 39, 37, + 14, 5, 13, 35, 55, 30, 40, 40, + 42, 8, 20, 25, 45, 35, 33, 36, + 54, 38, 27, 37, 62, 40, 15, 59, + 49, 31, 29, 34, 34, 39, 24, 29, + 25, 29, 21, 29, 10, 61, 33, 49, + 35, 34, 3, 38, 39, 29, 7, 41, + 1, 35, 4, 23, 15, 23, 11, 37, + 28, 35, 30, 30, 24, 1, 43, 56, + 8, 34, 42, 24, 45, 30, 20, 23, + 8, 38, 22, 33, 17, 52, 34, 22, + 53, 43, 44, 1, 27, 31, 41, 43, + 41, 30, 31, 36, 30, 5, 55, 31, + 33, 30, 40, 23, 15, 29, 34, 34, + 59, 34, 30, 11, 13, 38, 5, 0, + 30, 42, 5, 30, 29, 34, 10, 44, + 30, 63, 35, 12, 3, 26, 15, 17, + 25, 34, 43, 39, 34, 56, 29, 23, + 30, 12, 30, 10, 35, 9, 24, 58, + 10, 12, 54, 33, 37, 20, 41, 35, + 29, 18, 61, 30, 40, 24, 39, 53, + 62, 26, 29, 33, 34, 53, 49, 21, + 27, 11, 63, 20, 26, 23, 7, 13, + 6, 47, 29, 30, 9, 51, 22, 34, + 21, 25, 33, 56, 57, 30, 38, 51, + 51, 38, 63, 28, 40, 35, 33, 18, + 33, 33, 24, 58, 58, 34, 49, 29, + 43, 4, 1, 4, 42, 35, 35, 30, + 17, 5, 56, 61, 25, 37, 36, 55, + 28, 35, 29, 50, 48, 52, 2, 42, + 34, 40, 46, 46, 43, 35, 29, 48, + 20, 29, 31, 41, 7, 30, 35, 19, + 14, 21, 8, 39, 39, 40, 46, 55, + 34, 6, 30, 34, 37, 25, 37, 33, + 22, 44, 52, 17, 35, 29, 36, 35, + 40, 37, 28, 30, 50, 14, 28, 55, + 6, 23, 19, 14, 30, 3, 30, 28, + 28, 61, 61, 47, 45, 48, 40, 40, + 34, 34, 25, 30, 29, 35, 4, 26, + 53, 50, 26, 41, 27, 59, 27, 38, + 39, 3, 50, 43, 47, 23, 33, 55, + 35, 21, 23, 35, 61, 33, 46, 52, + 35, 34, 24, 30, 43, 16, 37, 21, + 2, 24, 45, 34, 30, 55, 55, 1, + 29, 29, 26, 28, 25, 31, 36, 22, + 17, 30, 52, 2, 44, 44, 57, 26, + 62, 41, 39, 57, 26, 46, 49, 11, + 16, 19, 5, 59, 38, 39, 58, 38, + 25, 49, 50, 22, 28, 59, 9, 59, + 7, 28, 55, 17, 4, 35, 50, 21, + 29, 44, 47, 18, 24, 19, 25, 42, + 35, 3, 51, 35, 16, 35, 30, 63, + 57, 39, 39, 25, 35, 38, 9, 16, + 36, 45, 31, 60, 14, 34, 42, 24, + 0, 37, 18, 61, 57, 37, 28, 53, + 20, 46, 14, 47, 38, 38, 38, 9, + 34, 39, 43, 17, 39, 59, 5, 27, + 0, 12, 27, +}; + +const UINT8 table1_mvy[1099] = { + 32, 32, 31, 31, 32, 33, 31, 33, + 33, 32, 32, 30, 34, 31, 32, 29, + 33, 30, 32, 33, 31, 35, 34, 30, + 34, 31, 33, 29, 29, 31, 33, 35, + 30, 30, 35, 32, 32, 34, 34, 28, + 25, 32, 36, 27, 32, 32, 32, 37, + 39, 3, 32, 30, 31, 26, 31, 32, + 32, 38, 29, 29, 32, 34, 31, 31, + 34, 35, 33, 33, 28, 33, 1, 33, + 27, 29, 30, 31, 28, 29, 37, 35, + 31, 33, 35, 27, 36, 37, 25, 25, + 61, 35, 4, 5, 32, 33, 36, 30, + 23, 30, 28, 34, 31, 32, 32, 39, + 32, 34, 21, 39, 32, 59, 32, 28, + 32, 36, 60, 33, 24, 36, 32, 32, + 41, 2, 32, 38, 26, 22, 33, 30, + 31, 32, 32, 30, 31, 32, 29, 3, + 40, 38, 32, 32, 33, 26, 31, 34, + 28, 38, 34, 31, 3, 31, 35, 38, + 27, 35, 33, 28, 29, 27, 29, 27, + 43, 29, 37, 63, 31, 33, 34, 30, + 31, 30, 37, 30, 35, 35, 26, 41, + 37, 31, 33, 28, 26, 30, 42, 24, + 7, 27, 33, 29, 36, 28, 34, 57, + 23, 41, 36, 23, 35, 34, 25, 30, + 25, 33, 25, 25, 29, 24, 33, 39, + 33, 33, 0, 37, 31, 36, 21, 32, + 61, 24, 35, 61, 31, 5, 31, 59, + 39, 21, 32, 30, 34, 22, 40, 32, + 29, 16, 31, 5, 62, 2, 20, 39, + 39, 32, 33, 1, 31, 24, 36, 32, + 36, 32, 28, 26, 6, 31, 38, 34, + 58, 35, 32, 33, 33, 17, 43, 26, + 31, 40, 31, 34, 32, 32, 31, 19, + 30, 32, 29, 33, 38, 38, 32, 59, + 40, 18, 38, 32, 35, 34, 32, 17, + 1, 15, 30, 28, 31, 28, 34, 29, + 32, 27, 35, 27, 49, 22, 37, 34, + 37, 26, 32, 32, 22, 28, 45, 29, + 30, 31, 43, 46, 41, 30, 26, 13, + 34, 32, 27, 38, 42, 42, 33, 47, + 33, 60, 27, 42, 25, 32, 22, 32, + 48, 32, 45, 33, 33, 41, 27, 25, + 19, 31, 35, 19, 36, 42, 27, 17, + 31, 44, 28, 33, 33, 31, 23, 31, + 40, 33, 31, 34, 30, 32, 33, 36, + 35, 47, 37, 41, 31, 23, 41, 29, + 30, 35, 32, 25, 32, 28, 58, 2, + 37, 33, 14, 33, 49, 20, 39, 36, + 21, 9, 23, 33, 35, 24, 39, 37, + 11, 33, 30, 31, 31, 28, 51, 40, + 35, 29, 25, 33, 46, 35, 37, 30, + 30, 8, 63, 28, 15, 40, 33, 45, + 49, 25, 32, 4, 47, 51, 36, 39, + 53, 10, 24, 29, 30, 31, 25, 40, + 38, 38, 33, 56, 23, 27, 32, 37, + 26, 29, 43, 36, 33, 24, 55, 43, + 9, 29, 34, 34, 24, 33, 18, 33, + 33, 30, 31, 50, 24, 60, 30, 39, + 34, 30, 39, 28, 22, 38, 2, 26, + 63, 32, 57, 21, 39, 33, 28, 18, + 30, 34, 22, 33, 29, 41, 30, 34, + 35, 21, 13, 34, 35, 39, 30, 46, + 32, 42, 32, 31, 33, 26, 11, 33, + 22, 31, 25, 31, 53, 27, 43, 25, + 40, 50, 21, 36, 38, 30, 12, 31, + 34, 20, 15, 29, 32, 62, 30, 13, + 17, 32, 19, 31, 20, 31, 30, 7, + 1, 17, 34, 37, 31, 31, 44, 34, + 26, 40, 16, 37, 52, 48, 30, 20, + 18, 33, 38, 29, 7, 25, 30, 54, + 45, 47, 46, 41, 29, 29, 16, 30, + 14, 26, 38, 34, 34, 29, 34, 30, + 29, 30, 57, 30, 4, 46, 33, 29, + 39, 44, 30, 31, 50, 33, 31, 32, + 19, 32, 40, 31, 37, 47, 1, 35, + 16, 31, 0, 35, 33, 1, 17, 34, + 9, 34, 33, 31, 49, 43, 42, 51, + 34, 29, 23, 29, 14, 30, 45, 49, + 11, 24, 31, 28, 35, 41, 30, 44, + 18, 29, 34, 35, 36, 25, 26, 21, + 31, 30, 34, 19, 34, 44, 36, 38, + 25, 31, 28, 23, 37, 3, 55, 41, + 30, 22, 41, 24, 33, 26, 35, 35, + 30, 55, 51, 47, 48, 38, 24, 15, + 21, 50, 25, 46, 30, 29, 10, 34, + 42, 45, 29, 42, 22, 3, 33, 27, + 34, 1, 34, 28, 34, 36, 35, 23, + 23, 13, 58, 3, 26, 63, 25, 31, + 34, 61, 38, 39, 25, 61, 29, 37, + 30, 41, 26, 48, 28, 33, 50, 35, + 30, 37, 29, 29, 40, 6, 39, 28, + 28, 19, 8, 22, 45, 34, 35, 10, + 58, 17, 37, 39, 30, 18, 54, 14, + 29, 16, 59, 30, 35, 23, 35, 30, + 47, 36, 29, 55, 20, 12, 31, 35, + 14, 29, 18, 34, 34, 24, 29, 26, + 22, 2, 27, 23, 8, 30, 55, 38, + 60, 31, 4, 34, 49, 34, 27, 34, + 33, 30, 31, 54, 42, 35, 38, 46, + 44, 26, 27, 9, 39, 25, 21, 29, + 28, 42, 13, 0, 5, 34, 37, 28, + 24, 29, 63, 26, 22, 27, 29, 25, + 33, 25, 61, 0, 35, 25, 36, 15, + 27, 40, 53, 33, 3, 10, 16, 37, + 38, 18, 30, 46, 27, 9, 6, 29, + 62, 8, 42, 28, 29, 3, 25, 16, + 26, 29, 35, 28, 27, 51, 61, 48, + 37, 9, 34, 7, 49, 45, 20, 29, + 21, 5, 5, 29, 28, 34, 29, 24, + 10, 24, 35, 36, 38, 55, 11, 36, + 38, 53, 54, 26, 30, 49, 20, 27, + 30, 39, 33, 41, 49, 22, 38, 38, + 4, 30, 8, 9, 3, 24, 22, 50, + 37, 36, 31, 27, 2, 9, 42, 63, + 25, 19, 44, 1, 28, 28, 48, 30, + 34, 41, 41, 38, 12, 27, 15, 0, + 16, 34, 35, 38, 28, 29, 40, 42, + 51, 52, 45, 54, 59, 59, 42, 44, + 37, 26, 46, 24, 15, 39, 22, 46, + 19, 35, 38, 17, 37, 23, 52, 55, + 50, 37, 26, 11, 37, 12, 24, 30, + 16, 13, 22, 13, 36, 35, 40, 41, + 34, 41, 26, 53, 51, 5, 21, 30, + 2, 63, 41, 20, 1, 56, 21, 24, + 25, 5, 28, 35, 26, 28, 30, 18, + 29, 23, 40, 34, 20, 42, 39, 34, + 28, 61, 38, 27, 62, 9, 36, 17, + 9, 49, 24, 25, 54, 34, 39, 37, + 3, 1, 25, 38, 38, 44, 35, 36, + 12, 60, 36, 38, 40, 25, 43, 39, + 53, 28, 39, 57, 46, 10, 52, 27, + 35, 42, 45, 59, 15, 60, 38, 24, + 23, 39, 12, 29, 24, 0, 20, 16, + 28, 43, 35, 28, 1, 49, 4, 21, + 42, 39, 29, 3, 44, 21, 53, 55, + 11, 5, 3, 39, 53, 28, 25, 19, + 34, 28, 21, +}; + +static MVTable mv_tables[2] = { + { + 1099, + table0_mv_code, + table0_mv_bits, + table0_mvx, + table0_mvy, + }, + { + 1099, + table1_mv_code, + table1_mv_bits, + table1_mvx, + table1_mvy, + } +}; diff --git a/libavcodec/resample.c b/libavcodec/resample.c new file mode 100644 index 0000000000..29445964f6 --- /dev/null +++ b/libavcodec/resample.c @@ -0,0 +1,301 @@ +/* + * Sample rate convertion for both audio and video + * Copyright (c) 2000 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <math.h> +#include "avcodec.h" + +#define NDEBUG +#include <assert.h> + +typedef struct { + /* fractional resampling */ + UINT32 incr; /* fractional increment */ + UINT32 frac; + int last_sample; + /* integer down sample */ + int iratio; /* integer divison ratio */ + int icount, isum; + int inv; +} ReSampleChannelContext; + +struct ReSampleContext { + ReSampleChannelContext channel_ctx[2]; + float ratio; + /* channel convert */ + int input_channels, output_channels, filter_channels; +}; + + +#define FRAC_BITS 16 +#define FRAC (1 << FRAC_BITS) + +static void init_mono_resample(ReSampleChannelContext *s, float ratio) +{ + ratio = 1.0 / ratio; + s->iratio = (int)floor(ratio); + if (s->iratio == 0) + s->iratio = 1; + s->incr = (int)((ratio / s->iratio) * FRAC); + s->frac = 0; + s->last_sample = 0; + s->icount = s->iratio; + s->isum = 0; + s->inv = (FRAC / s->iratio); +} + +/* fractional audio resampling */ +static int fractional_resample(ReSampleChannelContext *s, short *output, short *input, int nb_samples) +{ + unsigned int frac, incr; + int l0, l1; + short *q, *p, *pend; + + l0 = s->last_sample; + incr = s->incr; + frac = s->frac; + + p = input; + pend = input + nb_samples; + q = output; + + l1 = *p++; + for(;;) { + /* interpolate */ + *q++ = (l0 * (FRAC - frac) + l1 * frac) >> FRAC_BITS; + frac = frac + s->incr; + while (frac >= FRAC) { + if (p >= pend) + goto the_end; + frac -= FRAC; + l0 = l1; + l1 = *p++; + } + } + the_end: + s->last_sample = l1; + s->frac = frac; + return q - output; +} + +static int integer_downsample(ReSampleChannelContext *s, short *output, short *input, int nb_samples) +{ + short *q, *p, *pend; + int c, sum; + + p = input; + pend = input + nb_samples; + q = output; + + c = s->icount; + sum = s->isum; + + for(;;) { + sum += *p++; + if (--c == 0) { + *q++ = (sum * s->inv) >> FRAC_BITS; + c = s->iratio; + sum = 0; + } + if (p >= pend) + break; + } + s->isum = sum; + s->icount = c; + return q - output; +} + +/* n1: number of samples */ +static void stereo_to_mono(short *output, short *input, int n1) +{ + short *p, *q; + int n = n1; + + p = input; + q = output; + while (n >= 4) { + q[0] = (p[0] + p[1]) >> 1; + q[1] = (p[2] + p[3]) >> 1; + q[2] = (p[4] + p[5]) >> 1; + q[3] = (p[6] + p[7]) >> 1; + q += 4; + p += 8; + n -= 4; + } + while (n > 0) { + q[0] = (p[0] + p[1]) >> 1; + q++; + p += 2; + n--; + } +} + +/* n1: number of samples */ +static void mono_to_stereo(short *output, short *input, int n1) +{ + short *p, *q; + int n = n1; + int v; + + p = input; + q = output; + while (n >= 4) { + v = p[0]; q[0] = v; q[1] = v; + v = p[1]; q[2] = v; q[3] = v; + v = p[2]; q[4] = v; q[5] = v; + v = p[3]; q[6] = v; q[7] = v; + q += 8; + p += 4; + n -= 4; + } + while (n > 0) { + v = p[0]; q[0] = v; q[1] = v; + q += 2; + p += 1; + n--; + } +} + +/* XXX: should use more abstract 'N' channels system */ +static void stereo_split(short *output1, short *output2, short *input, int n) +{ + int i; + + for(i=0;i<n;i++) { + *output1++ = *input++; + *output2++ = *input++; + } +} + +static void stereo_mux(short *output, short *input1, short *input2, int n) +{ + int i; + + for(i=0;i<n;i++) { + *output++ = *input1++; + *output++ = *input2++; + } +} + +static int mono_resample(ReSampleChannelContext *s, short *output, short *input, int nb_samples) +{ + short buf1[nb_samples]; + short *buftmp; + + /* first downsample by an integer factor with averaging filter */ + if (s->iratio > 1) { + buftmp = buf1; + nb_samples = integer_downsample(s, buftmp, input, nb_samples); + } else { + buftmp = input; + } + + /* then do a fractional resampling with linear interpolation */ + if (s->incr != FRAC) { + nb_samples = fractional_resample(s, output, buftmp, nb_samples); + } else { + memcpy(output, buftmp, nb_samples * sizeof(short)); + } + return nb_samples; +} + +ReSampleContext *audio_resample_init(int output_channels, int input_channels, + int output_rate, int input_rate) +{ + ReSampleContext *s; + int i; + + if (output_channels > 2 || input_channels > 2) + return NULL; + + s = av_mallocz(sizeof(ReSampleContext)); + if (!s) + return NULL; + + s->ratio = (float)output_rate / (float)input_rate; + + s->input_channels = input_channels; + s->output_channels = output_channels; + + s->filter_channels = s->input_channels; + if (s->output_channels < s->filter_channels) + s->filter_channels = s->output_channels; + + for(i=0;i<s->filter_channels;i++) { + init_mono_resample(&s->channel_ctx[i], s->ratio); + } + return s; +} + +/* resample audio. 'nb_samples' is the number of input samples */ +/* XXX: optimize it ! */ +/* XXX: do it with polyphase filters, since the quality here is + HORRIBLE. Return the number of samples available in output */ +int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples) +{ + int i, nb_samples1; + short bufin[2][nb_samples]; + short bufout[2][(int)(nb_samples * s->ratio) + 16]; /* make some zoom to avoid round pb */ + short *buftmp2[2], *buftmp3[2]; + + if (s->input_channels == s->output_channels && s->ratio == 1.0) { + /* nothing to do */ + memcpy(output, input, nb_samples * s->input_channels * sizeof(short)); + return nb_samples; + } + + if (s->input_channels == 2 && + s->output_channels == 1) { + buftmp2[0] = bufin[0]; + buftmp3[0] = output; + stereo_to_mono(buftmp2[0], input, nb_samples); + } else if (s->output_channels == 2 && s->input_channels == 1) { + buftmp2[0] = input; + buftmp3[0] = bufout[0]; + } else if (s->output_channels == 2) { + buftmp2[0] = bufin[0]; + buftmp2[1] = bufin[1]; + buftmp3[0] = bufout[0]; + buftmp3[1] = bufout[1]; + stereo_split(buftmp2[0], buftmp2[1], input, nb_samples); + } else { + buftmp2[0] = input; + buftmp3[0] = output; + } + + /* resample each channel */ + nb_samples1 = 0; /* avoid warning */ + for(i=0;i<s->filter_channels;i++) { + nb_samples1 = mono_resample(&s->channel_ctx[i], buftmp3[i], buftmp2[i], nb_samples); + } + + if (s->output_channels == 2 && s->input_channels == 1) { + mono_to_stereo(output, buftmp3[0], nb_samples1); + } else if (s->output_channels == 2) { + stereo_mux(output, buftmp3[0], buftmp3[1], nb_samples1); + } + + return nb_samples1; +} + +void audio_resample_close(ReSampleContext *s) +{ + free(s); +} diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c new file mode 100644 index 0000000000..b03cd02937 --- /dev/null +++ b/libavcodec/rv10.c @@ -0,0 +1,487 @@ +/* + * RV10 codec + * Copyright (c) 2000,2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" + +//#define DEBUG + +static const UINT16 rv_lum_code[256] = +{ + 0x3e7f, 0x0f00, 0x0f01, 0x0f02, 0x0f03, 0x0f04, 0x0f05, 0x0f06, + 0x0f07, 0x0f08, 0x0f09, 0x0f0a, 0x0f0b, 0x0f0c, 0x0f0d, 0x0f0e, + 0x0f0f, 0x0f10, 0x0f11, 0x0f12, 0x0f13, 0x0f14, 0x0f15, 0x0f16, + 0x0f17, 0x0f18, 0x0f19, 0x0f1a, 0x0f1b, 0x0f1c, 0x0f1d, 0x0f1e, + 0x0f1f, 0x0f20, 0x0f21, 0x0f22, 0x0f23, 0x0f24, 0x0f25, 0x0f26, + 0x0f27, 0x0f28, 0x0f29, 0x0f2a, 0x0f2b, 0x0f2c, 0x0f2d, 0x0f2e, + 0x0f2f, 0x0f30, 0x0f31, 0x0f32, 0x0f33, 0x0f34, 0x0f35, 0x0f36, + 0x0f37, 0x0f38, 0x0f39, 0x0f3a, 0x0f3b, 0x0f3c, 0x0f3d, 0x0f3e, + 0x0f3f, 0x0380, 0x0381, 0x0382, 0x0383, 0x0384, 0x0385, 0x0386, + 0x0387, 0x0388, 0x0389, 0x038a, 0x038b, 0x038c, 0x038d, 0x038e, + 0x038f, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, + 0x0397, 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, + 0x039f, 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, + 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, + 0x00cf, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, + 0x0057, 0x0020, 0x0021, 0x0022, 0x0023, 0x000c, 0x000d, 0x0004, + 0x0000, 0x0005, 0x000e, 0x000f, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, + 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, + 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x03ac, 0x03ad, 0x03ae, 0x03af, + 0x03b0, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, + 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, + 0x0f40, 0x0f41, 0x0f42, 0x0f43, 0x0f44, 0x0f45, 0x0f46, 0x0f47, + 0x0f48, 0x0f49, 0x0f4a, 0x0f4b, 0x0f4c, 0x0f4d, 0x0f4e, 0x0f4f, + 0x0f50, 0x0f51, 0x0f52, 0x0f53, 0x0f54, 0x0f55, 0x0f56, 0x0f57, + 0x0f58, 0x0f59, 0x0f5a, 0x0f5b, 0x0f5c, 0x0f5d, 0x0f5e, 0x0f5f, + 0x0f60, 0x0f61, 0x0f62, 0x0f63, 0x0f64, 0x0f65, 0x0f66, 0x0f67, + 0x0f68, 0x0f69, 0x0f6a, 0x0f6b, 0x0f6c, 0x0f6d, 0x0f6e, 0x0f6f, + 0x0f70, 0x0f71, 0x0f72, 0x0f73, 0x0f74, 0x0f75, 0x0f76, 0x0f77, + 0x0f78, 0x0f79, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f7e, 0x0f7f, +}; + +static const UINT8 rv_lum_bits[256] = +{ + 14, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 7, 7, 7, 7, 7, 7, 7, + 7, 6, 6, 6, 6, 5, 5, 4, + 2, 4, 5, 5, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, +}; + +static const UINT16 rv_chrom_code[256] = +{ + 0xfe7f, 0x3f00, 0x3f01, 0x3f02, 0x3f03, 0x3f04, 0x3f05, 0x3f06, + 0x3f07, 0x3f08, 0x3f09, 0x3f0a, 0x3f0b, 0x3f0c, 0x3f0d, 0x3f0e, + 0x3f0f, 0x3f10, 0x3f11, 0x3f12, 0x3f13, 0x3f14, 0x3f15, 0x3f16, + 0x3f17, 0x3f18, 0x3f19, 0x3f1a, 0x3f1b, 0x3f1c, 0x3f1d, 0x3f1e, + 0x3f1f, 0x3f20, 0x3f21, 0x3f22, 0x3f23, 0x3f24, 0x3f25, 0x3f26, + 0x3f27, 0x3f28, 0x3f29, 0x3f2a, 0x3f2b, 0x3f2c, 0x3f2d, 0x3f2e, + 0x3f2f, 0x3f30, 0x3f31, 0x3f32, 0x3f33, 0x3f34, 0x3f35, 0x3f36, + 0x3f37, 0x3f38, 0x3f39, 0x3f3a, 0x3f3b, 0x3f3c, 0x3f3d, 0x3f3e, + 0x3f3f, 0x0f80, 0x0f81, 0x0f82, 0x0f83, 0x0f84, 0x0f85, 0x0f86, + 0x0f87, 0x0f88, 0x0f89, 0x0f8a, 0x0f8b, 0x0f8c, 0x0f8d, 0x0f8e, + 0x0f8f, 0x0f90, 0x0f91, 0x0f92, 0x0f93, 0x0f94, 0x0f95, 0x0f96, + 0x0f97, 0x0f98, 0x0f99, 0x0f9a, 0x0f9b, 0x0f9c, 0x0f9d, 0x0f9e, + 0x0f9f, 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, + 0x03c7, 0x03c8, 0x03c9, 0x03ca, 0x03cb, 0x03cc, 0x03cd, 0x03ce, + 0x03cf, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, + 0x00e7, 0x0030, 0x0031, 0x0032, 0x0033, 0x0008, 0x0009, 0x0002, + 0x0000, 0x0003, 0x000a, 0x000b, 0x0034, 0x0035, 0x0036, 0x0037, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x03d0, 0x03d1, 0x03d2, 0x03d3, 0x03d4, 0x03d5, 0x03d6, 0x03d7, + 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, + 0x0fa0, 0x0fa1, 0x0fa2, 0x0fa3, 0x0fa4, 0x0fa5, 0x0fa6, 0x0fa7, + 0x0fa8, 0x0fa9, 0x0faa, 0x0fab, 0x0fac, 0x0fad, 0x0fae, 0x0faf, + 0x0fb0, 0x0fb1, 0x0fb2, 0x0fb3, 0x0fb4, 0x0fb5, 0x0fb6, 0x0fb7, + 0x0fb8, 0x0fb9, 0x0fba, 0x0fbb, 0x0fbc, 0x0fbd, 0x0fbe, 0x0fbf, + 0x3f40, 0x3f41, 0x3f42, 0x3f43, 0x3f44, 0x3f45, 0x3f46, 0x3f47, + 0x3f48, 0x3f49, 0x3f4a, 0x3f4b, 0x3f4c, 0x3f4d, 0x3f4e, 0x3f4f, + 0x3f50, 0x3f51, 0x3f52, 0x3f53, 0x3f54, 0x3f55, 0x3f56, 0x3f57, + 0x3f58, 0x3f59, 0x3f5a, 0x3f5b, 0x3f5c, 0x3f5d, 0x3f5e, 0x3f5f, + 0x3f60, 0x3f61, 0x3f62, 0x3f63, 0x3f64, 0x3f65, 0x3f66, 0x3f67, + 0x3f68, 0x3f69, 0x3f6a, 0x3f6b, 0x3f6c, 0x3f6d, 0x3f6e, 0x3f6f, + 0x3f70, 0x3f71, 0x3f72, 0x3f73, 0x3f74, 0x3f75, 0x3f76, 0x3f77, + 0x3f78, 0x3f79, 0x3f7a, 0x3f7b, 0x3f7c, 0x3f7d, 0x3f7e, 0x3f7f, +}; + +static const UINT8 rv_chrom_bits[256] = +{ + 16, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 8, 8, 8, 8, 8, 8, 8, + 8, 6, 6, 6, 6, 4, 4, 3, + 2, 3, 4, 4, 6, 6, 6, 6, + 8, 8, 8, 8, 8, 8, 8, 8, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, +}; + +static VLC rv_dc_lum, rv_dc_chrom; + +int rv_decode_dc(MpegEncContext *s, int n) +{ + int code; + + if (n < 4) { + code = get_vlc(&s->gb, &rv_dc_lum); + if (code < 0) { + /* XXX: I don't understand why they use LONGER codes than + necessary. The following code would be completely useless + if they had thought about it !!! */ + code = get_bits(&s->gb, 7); + if (code == 0x7c) { + code = (INT8)(get_bits(&s->gb, 7) + 1); + } else if (code == 0x7d) { + code = -128 + get_bits(&s->gb, 7); + } else if (code == 0x7e) { + if (get_bits(&s->gb, 1) == 0) + code = (INT8)(get_bits(&s->gb, 8) + 1); + else + code = (INT8)(get_bits(&s->gb, 8)); + } else if (code == 0x7f) { + get_bits(&s->gb, 11); + code = 1; + } + } else { + code -= 128; + } + } else { + code = get_vlc(&s->gb, &rv_dc_chrom); + /* same remark */ + if (code < 0) { + code = get_bits(&s->gb, 9); + if (code == 0x1fc) { + code = (INT8)(get_bits(&s->gb, 7) + 1); + } else if (code == 0x1fd) { + code = -128 + get_bits(&s->gb, 7); + } else if (code == 0x1fe) { + get_bits(&s->gb, 9); + code = 1; + } else { + return 0xffff; + } + } else { + code -= 128; + } + } + return -code; +} + +/* write RV 1.0 compatible frame header */ +void rv10_encode_picture_header(MpegEncContext *s, int picture_number) +{ + align_put_bits(&s->pb); + + put_bits(&s->pb, 1, 1); /* marker */ + + put_bits(&s->pb, 1, (s->pict_type == P_TYPE)); + + put_bits(&s->pb, 1, 0); /* not PB frame */ + + put_bits(&s->pb, 5, s->qscale); + + if (s->pict_type == I_TYPE) { + /* specific MPEG like DC coding not used */ + } + /* if multiple packets per frame are sent, the position at which + to display the macro blocks is coded here */ + put_bits(&s->pb, 6, 0); /* mb_x */ + put_bits(&s->pb, 6, 0); /* mb_y */ + put_bits(&s->pb, 12, s->mb_width * s->mb_height); + + put_bits(&s->pb, 3, 0); /* ignored */ +} + +static int get_num(GetBitContext *gb) +{ + int n, n1; + + n = get_bits(gb, 16); + if (n >= 0x4000) { + return n - 0x4000; + } else { + n1 = get_bits(gb, 16); + return (n << 16) | n1; + } +} + +/* read RV 1.0 compatible frame header */ +static int rv10_decode_picture_header(MpegEncContext *s) +{ + int mb_count, pb_frame, marker, h, full_frame; + + /* skip packet header */ + h = get_bits(&s->gb, 8); + if ((h & 0xc0) == 0xc0) { + int len, pos; + full_frame = 1; + len = get_num(&s->gb); + pos = get_num(&s->gb); + } else { + int seq, frame_size, pos; + full_frame = 0; + seq = get_bits(&s->gb, 8); + frame_size = get_num(&s->gb); + pos = get_num(&s->gb); + } + /* picture number */ + get_bits(&s->gb, 8); + + marker = get_bits(&s->gb, 1); + + if (get_bits(&s->gb, 1)) + s->pict_type = P_TYPE; + else + s->pict_type = I_TYPE; + + pb_frame = get_bits(&s->gb, 1); + +#ifdef DEBUG + printf("pict_type=%d pb_frame=%d\n", s->pict_type, pb_frame); +#endif + + if (pb_frame) + return -1; + + s->qscale = get_bits(&s->gb, 5); + + if (s->pict_type == I_TYPE) { + if (s->rv10_version == 3) { + /* specific MPEG like DC coding not used */ + s->last_dc[0] = get_bits(&s->gb, 8); + s->last_dc[1] = get_bits(&s->gb, 8); + s->last_dc[2] = get_bits(&s->gb, 8); +#ifdef DEBUG + printf("DC:%d %d %d\n", + s->last_dc[0], + s->last_dc[1], + s->last_dc[2]); +#endif + } + } + /* if multiple packets per frame are sent, the position at which + to display the macro blocks is coded here */ + if (!full_frame) { + s->mb_x = get_bits(&s->gb, 6); /* mb_x */ + s->mb_y = get_bits(&s->gb, 6); /* mb_y */ + mb_count = get_bits(&s->gb, 12); + } else { + s->mb_x = 0; + s->mb_y = 0; + mb_count = s->mb_width * s->mb_height; + } + + get_bits(&s->gb, 3); /* ignored */ + s->f_code = 1; + s->unrestricted_mv = 1; +#if 0 + s->h263_long_vectors = 1; +#endif + return mb_count; +} + +static int rv10_decode_init(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + static int done; + + s->out_format = FMT_H263; + + s->width = avctx->width; + s->height = avctx->height; + + s->h263_rv10 = 1; + s->rv10_version = avctx->sub_id; + + if (MPV_common_init(s) < 0) + return -1; + + h263_decode_init_vlc(s); + + /* init rv vlc */ + if (!done) { + init_vlc(&rv_dc_lum, 9, 256, + rv_lum_bits, 1, 1, + rv_lum_code, 2, 2); + init_vlc(&rv_dc_chrom, 9, 256, + rv_chrom_bits, 1, 1, + rv_chrom_code, 2, 2); + done = 1; + } + + return 0; +} + +static int rv10_decode_end(AVCodecContext *avctx) +{ + MpegEncContext *s = avctx->priv_data; + + MPV_common_end(s); + return 0; +} + +static int rv10_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + MpegEncContext *s = avctx->priv_data; + int i, mb_count, mb_pos, left; + DCTELEM block[6][64]; + AVPicture *pict = data; + +#ifdef DEBUG + printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); +#endif + + /* no supplementary picture */ + if (buf_size == 0) { + *data_size = 0; + return 0; + } + + init_get_bits(&s->gb, buf, buf_size); + + mb_count = rv10_decode_picture_header(s); + if (mb_count < 0) { +#ifdef DEBUG + printf("HEADER ERROR\n"); +#endif + return -1; + } + + if (s->mb_x >= s->mb_width || + s->mb_y >= s->mb_height) { +#ifdef DEBUG + printf("POS ERROR %d %d\n", s->mb_x, s->mb_y); +#endif + return -1; + } + mb_pos = s->mb_y * s->mb_width + s->mb_x; + left = s->mb_width * s->mb_height - mb_pos; + if (mb_count > left) { +#ifdef DEBUG + printf("COUNT ERROR\n"); +#endif + return -1; + } + + if (s->mb_x == 0 && s->mb_y == 0) { + MPV_frame_start(s); + } + +#ifdef DEBUG + printf("qscale=%d\n", s->qscale); +#endif + + /* default quantization values */ + s->y_dc_scale = 8; + s->c_dc_scale = 8; + s->rv10_first_dc_coded[0] = 0; + s->rv10_first_dc_coded[1] = 0; + s->rv10_first_dc_coded[2] = 0; + + /* decode each macroblock */ + for(i=0;i<mb_count;i++) { +#ifdef DEBUG + printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); +#endif + + memset(block, 0, sizeof(block)); + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + if (h263_decode_mb(s, block) < 0) { +#ifdef DEBUG + printf("ERROR\n"); +#endif + return -1; + } + MPV_decode_mb(s, block); + if (++s->mb_x == s->mb_width) { + s->mb_x = 0; + s->mb_y++; + } + } + + if (s->mb_x == 0 && + s->mb_y == s->mb_height) { + MPV_frame_end(s); + + pict->data[0] = s->current_picture[0]; + pict->data[1] = s->current_picture[1]; + pict->data[2] = s->current_picture[2]; + pict->linesize[0] = s->linesize; + pict->linesize[1] = s->linesize / 2; + pict->linesize[2] = s->linesize / 2; + + avctx->quality = s->qscale; + *data_size = sizeof(AVPicture); + } else { + *data_size = 0; + } + return buf_size; +} + +AVCodec rv10_decoder = { + "rv10", + CODEC_TYPE_VIDEO, + CODEC_ID_RV10, + sizeof(MpegEncContext), + rv10_decode_init, + NULL, + rv10_decode_end, + rv10_decode_frame, +}; diff --git a/libavcodec/utils.c b/libavcodec/utils.c new file mode 100644 index 0000000000..9497f90117 --- /dev/null +++ b/libavcodec/utils.c @@ -0,0 +1,306 @@ +/* + * utils for libavcodec + * Copyright (c) 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" + +/* memory alloc */ +void *av_mallocz(int size) +{ + void *ptr; + ptr = malloc(size); + if (!ptr) + return NULL; + memset(ptr, 0, size); + return ptr; +} + +/* encoder management */ +AVCodec *first_avcodec; + +void register_avcodec(AVCodec *format) +{ + AVCodec **p; + p = &first_avcodec; + while (*p != NULL) p = &(*p)->next; + *p = format; + format->next = NULL; +} + +int avcodec_open(AVCodecContext *avctx, AVCodec *codec) +{ + int ret; + + avctx->codec = codec; + avctx->frame_number = 0; + avctx->priv_data = av_mallocz(codec->priv_data_size); + if (!avctx->priv_data) + return -ENOMEM; + ret = avctx->codec->init(avctx); + if (ret < 0) { + free(avctx->priv_data); + avctx->priv_data = NULL; + return ret; + } + return 0; +} + +int avcodec_encode_audio(AVCodecContext *avctx, UINT8 *buf, int buf_size, + const short *samples) +{ + int ret; + + ret = avctx->codec->encode(avctx, buf, buf_size, (void *)samples); + avctx->frame_number++; + return ret; +} + +int avcodec_encode_video(AVCodecContext *avctx, UINT8 *buf, int buf_size, + const AVPicture *pict) +{ + int ret; + + ret = avctx->codec->encode(avctx, buf, buf_size, (void *)pict); + avctx->frame_number++; + return ret; +} + +/* decode a frame. return -1 if error, otherwise return the number of + bytes used. If no frame could be decompressed, *got_picture_ptr is + zero. Otherwise, it is non zero */ +int avcodec_decode_video(AVCodecContext *avctx, AVPicture *picture, + int *got_picture_ptr, + UINT8 *buf, int buf_size) +{ + int ret; + + ret = avctx->codec->decode(avctx, picture, got_picture_ptr, + buf, buf_size); + avctx->frame_number++; + return ret; +} + +/* decode an audio frame. return -1 if error, otherwise return the + *number of bytes used. If no frame could be decompressed, + *frame_size_ptr is zero. Otherwise, it is the decompressed frame + *size in BYTES. */ +int avcodec_decode_audio(AVCodecContext *avctx, INT16 *samples, + int *frame_size_ptr, + UINT8 *buf, int buf_size) +{ + int ret; + + ret = avctx->codec->decode(avctx, samples, frame_size_ptr, + buf, buf_size); + avctx->frame_number++; + return ret; +} + +int avcodec_close(AVCodecContext *avctx) +{ + if (avctx->codec->close) + avctx->codec->close(avctx); + free(avctx->priv_data); + avctx->priv_data = NULL; + avctx->codec = NULL; + return 0; +} + +AVCodec *avcodec_find_encoder(enum CodecID id) +{ + AVCodec *p; + p = first_avcodec; + while (p) { + if (p->encode != NULL && p->id == id) + return p; + p = p->next; + } + return NULL; +} + +AVCodec *avcodec_find_decoder(enum CodecID id) +{ + AVCodec *p; + p = first_avcodec; + while (p) { + if (p->decode != NULL && p->id == id) + return p; + p = p->next; + } + return NULL; +} + +AVCodec *avcodec_find_decoder_by_name(const char *name) +{ + AVCodec *p; + p = first_avcodec; + while (p) { + if (p->decode != NULL && strcmp(name,p->name) == 0) + return p; + p = p->next; + } + return NULL; +} + +AVCodec *avcodec_find(enum CodecID id) +{ + AVCodec *p; + p = first_avcodec; + while (p) { + if (p->id == id) + return p; + p = p->next; + } + return NULL; +} + +void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) +{ + const char *codec_name; + AVCodec *p; + char buf1[32]; + + if (encode) + p = avcodec_find_encoder(enc->codec_id); + else + p = avcodec_find_decoder(enc->codec_id); + + if (p) { + codec_name = p->name; + } else if (enc->codec_name[0] != '\0') { + codec_name = enc->codec_name; + } else { + /* output avi tags */ + if (enc->codec_type == CODEC_TYPE_VIDEO) { + snprintf(buf1, sizeof(buf1), "%c%c%c%c", + enc->codec_tag & 0xff, + (enc->codec_tag >> 8) & 0xff, + (enc->codec_tag >> 16) & 0xff, + (enc->codec_tag >> 24) & 0xff); + } else { + snprintf(buf1, sizeof(buf1), "0x%04x", enc->codec_tag); + } + codec_name = buf1; + } + + switch(enc->codec_type) { + case CODEC_TYPE_VIDEO: + snprintf(buf, buf_size, + "Video: %s%s", + codec_name, enc->flags & CODEC_FLAG_HQ ? " (hq)" : ""); + if (enc->width) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %dx%d, %0.2f fps", + enc->width, enc->height, + (float)enc->frame_rate / FRAME_RATE_BASE); + } + break; + case CODEC_TYPE_AUDIO: + snprintf(buf, buf_size, + "Audio: %s", + codec_name); + if (enc->sample_rate) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %d Hz, %s", + enc->sample_rate, + enc->channels == 2 ? "stereo" : "mono"); + } + break; + default: + abort(); + } + if (enc->bit_rate != 0) { + snprintf(buf + strlen(buf), buf_size - strlen(buf), + ", %d kb/s", enc->bit_rate / 1000); + } +} + +/* must be called before any other functions */ +void avcodec_init(void) +{ + dsputil_init(); +} + +/* simple call to use all the codecs */ +void avcodec_register_all(void) +{ + register_avcodec(&ac3_encoder); + register_avcodec(&mp2_encoder); + register_avcodec(&mpeg1video_encoder); + register_avcodec(&h263_encoder); + register_avcodec(&h263p_encoder); + register_avcodec(&rv10_encoder); + register_avcodec(&mjpeg_encoder); + register_avcodec(&opendivx_encoder); + register_avcodec(&msmpeg4_encoder); + register_avcodec(&pcm_codec); + register_avcodec(&rawvideo_codec); + /* decoders */ + register_avcodec(&h263_decoder); + register_avcodec(&opendivx_decoder); + register_avcodec(&msmpeg4_decoder); + register_avcodec(&mpeg_decoder); + register_avcodec(&h263i_decoder); + register_avcodec(&rv10_decoder); +} + +static int encode_init(AVCodecContext *s) +{ + return 0; +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + return -1; +} + +static int encode_frame(AVCodecContext *avctx, + unsigned char *frame, int buf_size, void *data) +{ + return -1; +} + +/* dummy pcm codec */ +AVCodec pcm_codec = { + "pcm", + CODEC_TYPE_AUDIO, + CODEC_ID_PCM, + 0, + encode_init, + encode_frame, + NULL, + decode_frame, +}; + +AVCodec rawvideo_codec = { + "rawvideo", + CODEC_TYPE_VIDEO, + CODEC_ID_RAWVIDEO, + 0, + encode_init, + encode_frame, + NULL, + decode_frame, +}; |