summaryrefslogtreecommitdiff
path: root/libavcodec/ppc
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-02-21 09:07:13 -0500
committerRonald S. Bultje <rsbultje@gmail.com>2011-02-21 10:23:44 -0500
commitf8bed30d8b176fa030f6737765338bb4a2bcabc9 (patch)
treef55e179732f7db45c5fcfecd80757676bdbf7fcc /libavcodec/ppc
parent8d9ac969cb4ac3e3e18f6425703af4d7aec6c513 (diff)
downloadffmpeg-f8bed30d8b176fa030f6737765338bb4a2bcabc9.tar.gz
VC1: merge idct8x8, coeff adjustments and put_pixels.
Merging these functions allows merging some loops, which makes the results (particularly after SIMD optimizations) much faster.
Diffstat (limited to 'libavcodec/ppc')
-rw-r--r--libavcodec/ppc/vc1dsp_altivec.c64
1 files changed, 61 insertions, 3 deletions
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index 5a0dddbe1d..b5cc71c3cf 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -130,7 +130,8 @@ do { \
/** Do inverse transform on 8x8 block
*/
-static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
+static void vc1_inv_trans_8x8_altivec(DCTELEM block[64],
+ int sign, int rangered)
{
vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
@@ -144,7 +145,9 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
const vector unsigned int vec_2 = vec_splat_u32(2);
const vector signed int vec_1s = vec_splat_s32(1);
const vector unsigned int vec_1 = vec_splat_u32(1);
-
+ const vector unsigned short rangered_shift = vec_splat_u16(1);
+ const vector signed short signed_bias = vec_sl(vec_splat_u16(4),
+ vec_splat_u16(4));
src0 = vec_ld( 0, block);
src1 = vec_ld( 16, block);
@@ -214,6 +217,27 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
src6 = vec_pack(sE, s6);
src7 = vec_pack(sF, s7);
+ if (rangered) {
+ if (!sign) {
+ vec_sub(src0, signed_bias);
+ vec_sub(src1, signed_bias);
+ vec_sub(src2, signed_bias);
+ vec_sub(src3, signed_bias);
+ vec_sub(src4, signed_bias);
+ vec_sub(src5, signed_bias);
+ vec_sub(src6, signed_bias);
+ vec_sub(src7, signed_bias);
+ }
+ vec_sl(src0, rangered_shift);
+ vec_sl(src1, rangered_shift);
+ vec_sl(src2, rangered_shift);
+ vec_sl(src3, rangered_shift);
+ vec_sl(src4, rangered_shift);
+ vec_sl(src5, rangered_shift);
+ vec_sl(src6, rangered_shift);
+ vec_sl(src7, rangered_shift);
+ }
+
vec_st(src0, 0, block);
vec_st(src1, 16, block);
vec_st(src2, 32, block);
@@ -224,6 +248,36 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
vec_st(src7,112, block);
}
+static void vc1_inv_trans_8x8_add_altivec(uint8_t *dest, int stride, DCTELEM *b)
+{
+ vc1_inv_trans_8x8_altivec(b, 0, 0);
+ ff_add_pixels_clamped_c(b, dest, stride);
+}
+
+static void vc1_inv_trans_8x8_put_signed_altivec(uint8_t *dest, int stride, DCTELEM *b)
+{
+ vc1_inv_trans_8x8_altivec(b, 1, 0);
+ ff_put_signed_pixels_clamped_c(b, dest, stride);
+}
+
+static void vc1_inv_trans_8x8_put_signed_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b)
+{
+ vc1_inv_trans_8x8_altivec(b, 1, 1);
+ ff_put_signed_pixels_clamped_c(b, dest, stride);
+}
+
+static void vc1_inv_trans_8x8_put_altivec(uint8_t *dest, int stride, DCTELEM *b)
+{
+ vc1_inv_trans_8x8_altivec(b, 0, 0);
+ ff_put_pixels_clamped_c(b, dest, stride);
+}
+
+static void vc1_inv_trans_8x8_put_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b)
+{
+ vc1_inv_trans_8x8_altivec(b, 0, 1);
+ ff_put_pixels_clamped_c(b, dest, stride);
+}
+
/** Do inverse transform on 8x4 part of block
*/
static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
@@ -342,7 +396,11 @@ void ff_vc1dsp_init_altivec(VC1DSPContext* dsp)
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return;
- dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
+ dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_altivec;
+ dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_altivec;
+ dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_altivec;
+ dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_altivec;
+ dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_altivec;
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec;