From 34e1b0754638ea6e0356239aad01e74de3a582a5 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Tue, 17 Apr 2012 18:44:48 +0000 Subject: rv40: don't always do the full prev_type search 120->100 cycles. Signed-off-by: Ronald S. Bultje --- libavcodec/rv40.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'libavcodec/rv40.c') diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c index b65a2005f0..18dd51e871 100644 --- a/libavcodec/rv40.c +++ b/libavcodec/rv40.c @@ -228,8 +228,6 @@ static int rv40_decode_mb_info(RV34DecContext *r) int q, i; int prev_type = 0; int mb_pos = s->mb_x + s->mb_y * s->mb_stride; - int blocks[RV34_MB_TYPES] = {0}; - int count = 0; if(!r->s.mb_skip_run) r->s.mb_skip_run = svq3_get_ue_golomb(gb) + 1; @@ -237,22 +235,27 @@ static int rv40_decode_mb_info(RV34DecContext *r) if(--r->s.mb_skip_run) return RV34_MB_SKIP; - if(r->avail_cache[6-1]) - blocks[r->mb_type[mb_pos - 1]]++; if(r->avail_cache[6-4]){ + int blocks[RV34_MB_TYPES] = {0}; + int count = 0; + if(r->avail_cache[6-1]) + blocks[r->mb_type[mb_pos - 1]]++; blocks[r->mb_type[mb_pos - s->mb_stride]]++; if(r->avail_cache[6-2]) blocks[r->mb_type[mb_pos - s->mb_stride + 1]]++; if(r->avail_cache[6-5]) blocks[r->mb_type[mb_pos - s->mb_stride - 1]]++; - } - - for(i = 0; i < RV34_MB_TYPES; i++){ - if(blocks[i] > count){ - count = blocks[i]; - prev_type = i; + for(i = 0; i < RV34_MB_TYPES; i++){ + if(blocks[i] > count){ + count = blocks[i]; + prev_type = i; + if(count>1) + break; + } } - } + } else if (r->avail_cache[6-1]) + prev_type = r->mb_type[mb_pos - 1]; + if(s->pict_type == AV_PICTURE_TYPE_P){ prev_type = block_num_to_ptype_vlc_num[prev_type]; q = get_vlc2(gb, ptype_vlc[prev_type].table, PTYPE_VLC_BITS, 1); -- cgit v1.2.1 From d834ab0f2a5d5689cdd323e935534fda918319ef Mon Sep 17 00:00:00 2001 From: Christophe Gisquet Date: Tue, 17 Apr 2012 18:44:51 +0000 Subject: rv40: perform bitwise checks in loop filter Down from 95 kcycles to 93 (including all called functions). Signed-off-by: Ronald S. Bultje --- libavcodec/rv40.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'libavcodec/rv40.c') diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c index 18dd51e871..695b0299a6 100644 --- a/libavcodec/rv40.c +++ b/libavcodec/rv40.c @@ -431,7 +431,7 @@ static void rv40_loop_filter(RV34DecContext *r, int row) y_v_deblock &= ~MASK_Y_LEFT_COL; if(!row) y_h_deblock &= ~MASK_Y_TOP_ROW; - if(row == s->mb_height - 1 || (mb_strong[POS_CUR] || mb_strong[POS_BOTTOM])) + if(row == s->mb_height - 1 || (mb_strong[POS_CUR] | mb_strong[POS_BOTTOM])) y_h_deblock &= ~(MASK_Y_TOP_ROW << 16); /* Calculating chroma patterns is similar and easier since there is * no motion vector pattern for them. @@ -448,7 +448,7 @@ static void rv40_loop_filter(RV34DecContext *r, int row) c_v_deblock[i] &= ~MASK_C_LEFT_COL; if(!row) c_h_deblock[i] &= ~MASK_C_TOP_ROW; - if(row == s->mb_height - 1 || mb_strong[POS_CUR] || mb_strong[POS_BOTTOM]) + if(row == s->mb_height - 1 || (mb_strong[POS_CUR] | mb_strong[POS_BOTTOM])) c_h_deblock[i] &= ~(MASK_C_TOP_ROW << 4); } @@ -469,7 +469,7 @@ static void rv40_loop_filter(RV34DecContext *r, int row) 0, 0, 0); } // filter left block edge in ordinary mode (with low filtering strength) - if(y_v_deblock & (MASK_CUR << ij) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){ + if(y_v_deblock & (MASK_CUR << ij) && (i || !(mb_strong[POS_CUR] | mb_strong[POS_LEFT]))){ if(!i) clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0; else @@ -480,14 +480,14 @@ static void rv40_loop_filter(RV34DecContext *r, int row) alpha, beta, betaY, 0, 0, 1); } // filter top edge of the current macroblock when filtering strength is high - if(!j && y_h_deblock & (MASK_CUR << i) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){ + if(!j && y_h_deblock & (MASK_CUR << i) && (mb_strong[POS_CUR] | mb_strong[POS_TOP])){ rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither, clip_cur, mvmasks[POS_TOP] & (MASK_TOP << i) ? clip[POS_TOP] : 0, alpha, beta, betaY, 0, 1, 0); } // filter left block edge in edge mode (with high filtering strength) - if(y_v_deblock & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){ + if(y_v_deblock & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] | mb_strong[POS_LEFT])){ clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0; rv40_adaptive_loop_filter(&r->rdsp, Y, s->linesize, dither, clip_cur, @@ -509,7 +509,7 @@ static void rv40_loop_filter(RV34DecContext *r, int row) clip_cur, alpha, beta, betaC, 1, 0, 0); } - if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){ + if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || !(mb_strong[POS_CUR] | mb_strong[POS_LEFT]))){ if(!i) clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0; else @@ -519,14 +519,14 @@ static void rv40_loop_filter(RV34DecContext *r, int row) clip_left, alpha, beta, betaC, 1, 0, 1); } - if(!j && c_h_deblock[k] & (MASK_CUR << ij) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){ + if(!j && c_h_deblock[k] & (MASK_CUR << ij) && (mb_strong[POS_CUR] | mb_strong[POS_TOP])){ int clip_top = uvcbp[POS_TOP][k] & (MASK_CUR << (ij+2)) ? clip[POS_TOP] : 0; rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, i*8, clip_cur, clip_top, alpha, beta, betaC, 1, 1, 0); } - if(c_v_deblock[k] & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){ + if(c_v_deblock[k] & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] | mb_strong[POS_LEFT])){ clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0; rv40_adaptive_loop_filter(&r->rdsp, C, s->uvlinesize, j*8, clip_cur, -- cgit v1.2.1