diff options
-rw-r--r-- | libavcodec/cabac.c | 54 | ||||
-rw-r--r-- | libavcodec/cabac.h | 5 | ||||
-rw-r--r-- | libavcodec/cabac_functions.h | 8 | ||||
-rw-r--r-- | libavcodec/h264_cabac.c | 9 | ||||
-rw-r--r-- | libavcodec/x86/cabac.h | 18 | ||||
-rw-r--r-- | libavcodec/x86/h264_i386.h | 24 |
6 files changed, 70 insertions, 48 deletions
diff --git a/libavcodec/cabac.c b/libavcodec/cabac.c index 4afcafb52b..bd7d9494b3 100644 --- a/libavcodec/cabac.c +++ b/libavcodec/cabac.c @@ -31,6 +31,29 @@ #include "cabac.h" #include "cabac_functions.h" +uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = { + 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, + 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + static const uint8_t lps_range[64][4]= { {128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205}, {116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166}, @@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= { { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, }; -uint8_t ff_h264_mlps_state[4*64]; -uint8_t ff_h264_lps_range[4*2*64]; static uint8_t h264_mps_state[2 * 64]; static const uint8_t mps_state[64]= { @@ -76,27 +97,11 @@ static const uint8_t lps_state[64]= { 36,36,37,37,37,38,38,63, }; -const uint8_t ff_h264_norm_shift[512]= { - 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, - 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +static const uint8_t last_coeff_flag_offset_8x8[63] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 }; /** @@ -153,6 +158,9 @@ void ff_init_cabac_states(CABACContext *c){ ff_h264_mlps_state[128-2*i-2]= 0; } } + for(i=0; i< 63; i++){ + ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i]; + } } #ifdef TEST diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index 5a99f0b2fe..1f1c943262 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -31,6 +31,11 @@ #include "put_bits.h" +#define H264_NORM_SHIFT_OFFSET 0 +#define H264_LPS_RANGE_OFFSET 512 +#define H264_MLPS_STATE_OFFSET 1024 +#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280 + #define CABAC_BITS 16 #define CABAC_MASK ((1<<CABAC_BITS)-1) diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h index 4c74cf7b23..484ba85db6 100644 --- a/libavcodec/cabac_functions.h +++ b/libavcodec/cabac_functions.h @@ -36,9 +36,11 @@ # include "x86/cabac.h" #endif -extern const uint8_t ff_h264_norm_shift[512]; -extern uint8_t ff_h264_mlps_state[4*64]; -extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS +extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63]; +static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET; +static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET; +static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET; +static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET; static void refill(CABACContext *c){ #if CABAC_BITS == 16 diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 9e1cf1b520..f27e72faf0 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -1560,13 +1560,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, return base_ctx[cat] + ctx; } -DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 -}; - static av_always_inline void decode_cabac_residual_internal(H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, @@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block, last_coeff_ctx_base-significant_coeff_ctx_base); } #else - DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); + DECODE_SIGNIFICANCE( 63, sig_off[last], ff_h264_last_coeff_flag_offset_8x8[last] ); } else { if (is_dc && chroma422) { // dc 422 DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h index c1fc0d1139..32ce2b2762 100644 --- a/libavcodec/x86/cabac.h +++ b/libavcodec/x86/cabac.h @@ -24,6 +24,7 @@ #include "libavcodec/cabac.h" #include "libavutil/attributes.h" #include "libavutil/x86_cpu.h" +#include "libavutil/internal.h" #include "config.h" #if HAVE_FAST_CMOV @@ -51,16 +52,16 @@ "xor "tmp" , "ret" \n\t" #endif /* HAVE_FAST_CMOV */ -#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end) \ +#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \ "movzbl "statep" , "ret" \n\t"\ "mov "range" , "tmp" \n\t"\ "and $0xC0 , "range" \n\t"\ - "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ + "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\ "sub "range" , "tmp" \n\t"\ BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \ - "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ + "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\ "shl %%cl , "range" \n\t"\ - "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ + "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\ "shl %%cl , "low" \n\t"\ "mov "tmpbyte" , "statep" \n\t"\ "test "lowword" , "lowword" \n\t"\ @@ -76,7 +77,7 @@ "shr $15 , %%ecx \n\t"\ "bswap "tmp" \n\t"\ "shr $15 , "tmp" \n\t"\ - "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ + "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\ "sub $0xFFFF , "tmp" \n\t"\ "neg %%ecx \n\t"\ "add $7 , %%ecx \n\t"\ @@ -94,11 +95,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, __asm__ volatile( BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1", "%2", "%3", "%b3", - "%a6(%5)", "%a7(%5)") + "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10") : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp) : "r"(state), "r"(c), "i"(offsetof(CABACContext, bytestream)), - "i"(offsetof(CABACContext, bytestream_end)) + "i"(offsetof(CABACContext, bytestream_end)), + "i"(H264_NORM_SHIFT_OFFSET), + "i"(H264_LPS_RANGE_OFFSET), + "i"(H264_MLPS_STATE_OFFSET) : "%"REG_c, "memory" ); return bit & 1; diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h index e849a3d90c..add795e285 100644 --- a/libavcodec/x86/h264_i386.h +++ b/libavcodec/x86/h264_i386.h @@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, int minusindex= 4-(intptr_t)index; int bit; x86_reg coeff_count; + __asm__ volatile( "3: \n\t" BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", "%5", "%k0", "%b0", - "%a11(%6)", "%a12(%6)") + "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") "test $1, %4 \n\t" " jz 4f \n\t" @@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", "%5", "%k0", "%b0", - "%a11(%6)", "%a12(%6)") + "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15") "sub %10, %1 \n\t" "mov %2, %0 \n\t" @@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, "+&r"(c->low), "=&r"(bit), "+&r"(c->range) : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), "i"(offsetof(CABACContext, bytestream)), - "i"(offsetof(CABACContext, bytestream_end)) + "i"(offsetof(CABACContext, bytestream_end)), + "i"(H264_NORM_SHIFT_OFFSET), + "i"(H264_LPS_RANGE_OFFSET), + "i"(H264_MLPS_STATE_OFFSET) : "%"REG_c, "memory" ); return coeff_count; @@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c, x86_reg coeff_count; x86_reg last=0; x86_reg state; + __asm__ volatile( "mov %1, %6 \n\t" "3: \n\t" @@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c, BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", "%5", "%k0", "%b0", - "%a12(%7)", "%a13(%7)") + "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") "mov %1, %k6 \n\t" "test $1, %4 \n\t" " jz 4f \n\t" - "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" + "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t" + "add %11, %6 \n\t" BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", "%5", "%k0", "%b0", - "%a12(%7)", "%a13(%7)") + "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16") "mov %2, %0 \n\t" "mov %1, %k6 \n\t" @@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c, : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base), "i"(offsetof(CABACContext, bytestream)), - "i"(offsetof(CABACContext, bytestream_end)) + "i"(offsetof(CABACContext, bytestream_end)), + "i"(H264_NORM_SHIFT_OFFSET), + "i"(H264_LPS_RANGE_OFFSET), + "i"(H264_MLPS_STATE_OFFSET), + "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) : "%"REG_c, "memory" ); return coeff_count; |