From 2393e65431bc3457e675f9030a817b2adac4001f Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Mon, 15 Dec 2003 09:21:28 +0000 Subject: 100l (forgot alpha & ppc) Originally committed as revision 2611 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/alpha/mpegvideo_alpha.c | 77 +++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 13 deletions(-) (limited to 'libavcodec/alpha') diff --git a/libavcodec/alpha/mpegvideo_alpha.c b/libavcodec/alpha/mpegvideo_alpha.c index 6b720373c2..f64fb7472a 100644 --- a/libavcodec/alpha/mpegvideo_alpha.c +++ b/libavcodec/alpha/mpegvideo_alpha.c @@ -21,7 +21,7 @@ #include "../dsputil.h" #include "../mpegvideo.h" -static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, +static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, n_coeffs; @@ -35,19 +35,15 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, /* This mask kills spill from negative subwords to the next subword. */ correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ - if (s->mb_intra) { - if (!s->h263_aic) { - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - } else { - qadd = 0; - } - n_coeffs = 63; // does not always use zigzag table + if (!s->h263_aic) { + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; } else { - n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + qadd = 0; } + n_coeffs = 63; // does not always use zigzag table for(i = 0; i <= n_coeffs; block += 4, i += 4) { uint64_t levels, negmask, zeros, add; @@ -90,7 +86,62 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, orig_block[0] = block0; } +static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, + int n, int qscale) +{ + int i, n_coeffs; + uint64_t qmul, qadd; + uint64_t correction; + DCTELEM *orig_block = block; + DCTELEM block0; + + qadd = WORD_VEC((qscale - 1) | 1); + qmul = qscale << 1; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + + n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + + for(i = 0; i <= n_coeffs; block += 4, i += 4) { + uint64_t levels, negmask, zeros, add; + + levels = ldq(block); + if (levels == 0) + continue; + +#ifdef __alpha_max__ + /* I don't think the speed difference justifies runtime + detection. */ + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ +#else + negmask = cmpbge(WORD_VEC(0x7fff), levels); + negmask &= (negmask >> 1) | (1 << 7); + negmask = zap(-1, negmask); +#endif + + zeros = cmpbge(0, levels); + zeros &= zeros >> 1; + /* zeros |= zeros << 1 is not needed since qadd <= 255, so + zapping the lower byte suffices. */ + + levels *= qmul; + levels -= correction & (negmask << 16); + + /* Negate qadd for negative levels. */ + add = qadd ^ negmask; + add += WORD_VEC(0x0001) & negmask; + /* Set qadd to 0 for levels == 0. */ + add = zap(add, zeros); + + levels += add; + + stq(levels, block); + } +} + void MPV_common_init_axp(MpegEncContext *s) { - s->dct_unquantize_h263 = dct_unquantize_h263_axp; + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; } -- cgit v1.2.1