diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2005-06-02 21:15:20 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2005-06-02 21:15:20 +0000 |
commit | 43efd19a88bee63d50f568be5ede9d8c1f1dc091 (patch) | |
tree | 99e1880bf2fd4fe75a2d0ba3d478a073d3be3f64 | |
parent | 20ffdcf9c09f6b9cb4265735ea8a323439b66a8f (diff) | |
download | ffmpeg-43efd19a88bee63d50f568be5ede9d8c1f1dc091.tar.gz |
decode H.264 with 8x8 transform.
deblocking is still incorrect with 8x8+cavlc
Originally committed as revision 4339 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/dsputil.c | 1 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 2 | ||||
-rw-r--r-- | libavcodec/h264.c | 543 | ||||
-rw-r--r-- | libavcodec/h264data.h | 139 | ||||
-rw-r--r-- | libavcodec/h264idct.c | 71 |
5 files changed, 693 insertions, 63 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 16daa01e45..9a7854910f 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3685,6 +3685,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) } c->h264_idct_add= ff_h264_idct_add_c; + c->h264_idct8_add= ff_h264_idct8_add_c; c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 80ae97ac43..c1c70e415a 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -50,6 +50,7 @@ void ff_fdct_mmx(DCTELEM *block); void ff_fdct_mmx2(DCTELEM *block); void ff_fdct_sse2(DCTELEM *block); +void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); @@ -325,6 +326,7 @@ typedef struct DSPContext { #define RECON_SHIFT 6 void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); } DSPContext; void dsputil_static_init(void); diff --git a/libavcodec/h264.c b/libavcodec/h264.c index f7475d09ef..c084dd235a 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -109,6 +109,7 @@ typedef struct PPS{ int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag int constrained_intra_pred; ///< constrained_intra_pred_flag int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag + int transform_8x8_mode; ///< transform_8x8_mode_flag }PPS; /** @@ -174,6 +175,7 @@ typedef struct H264Context{ int8_t intra4x4_pred_mode_cache[5*8]; int8_t (*intra4x4_pred_mode)[8]; void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp? + void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride); void (*pred8x8 [4+3])(uint8_t *src, int stride); void (*pred16x16[4+3])(uint8_t *src, int stride); unsigned int topleft_samples_available; @@ -204,6 +206,11 @@ typedef struct H264Context{ int mv_cache_clean[2]; /** + * number of neighbors (top and/or left) that used 8x8 dct + */ + int neighbor_transform_size; + + /** * block_offset[ 0..23] for frame macroblocks * block_offset[24..47] for field macroblocks */ @@ -229,6 +236,8 @@ typedef struct H264Context{ */ PPS pps; //FIXME move to Picture perhaps? (->no) do we need that? + uint16_t (*dequant8_coeff)[64]; + int slice_num; uint8_t *slice_table_base; uint8_t *slice_table; ///< slice_table_base + mb_stride + 1 @@ -318,7 +327,7 @@ typedef struct H264Context{ * Cabac */ CABACContext cabac; - uint8_t cabac_state[399]; + uint8_t cabac_state[460]; int cabac_init_idc; /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ @@ -841,6 +850,8 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){ } } #endif + + h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); } static inline void write_back_intra_pred_mode(H264Context *h){ @@ -2185,11 +2196,7 @@ static void pred8x8_horizontal_c(uint8_t *src, int stride){ static void pred8x8_128_dc_c(uint8_t *src, int stride){ int i; - for(i=0; i<4; i++){ - ((uint32_t*)(src+i*stride))[0]= - ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U; - } - for(i=4; i<8; i++){ + for(i=0; i<8; i++){ ((uint32_t*)(src+i*stride))[0]= ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U; } @@ -2298,6 +2305,248 @@ static void pred8x8_plane_c(uint8_t *src, int stride){ } } +#define SRC(x,y) src[(x)+(y)*stride] +#define PL(y) \ + const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; +#define PREDICT_8x8_LOAD_LEFT \ + const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ + + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ + PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ + const int l7 = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2; + +#define PT(x) \ + const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOP \ + const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ + + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ + PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ + const int t7 = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ + + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2; \ + +#define PTR(x) \ + t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; +#define PREDICT_8x8_LOAD_TOPRIGHT \ + int t8, t9, t10, t11, t12, t13, t14, t15; \ + if(has_topright) { \ + PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ + t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ + } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); + +#define PREDICT_8x8_LOAD_TOPLEFT \ + const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2; + +#define PREDICT_8x8_DC(v) \ + int y; \ + for( y = 0; y < 8; y++ ) { \ + ((uint32_t*)src)[0] = \ + ((uint32_t*)src)[1] = v; \ + src += stride; \ + } + +static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_DC(0x80808080); +} +static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; + const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101; + PREDICT_8x8_DC(dc); +} +static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101; + PREDICT_8x8_DC(dc); +} +static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOP; + const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7 + +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101; + PREDICT_8x8_DC(dc); +} +static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; +#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\ + ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y + ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); +#undef ROW +} +static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + int y; + PREDICT_8x8_LOAD_TOP; + src[0] = t0; + src[1] = t1; + src[2] = t2; + src[3] = t3; + src[4] = t4; + src[5] = t5; + src[6] = t6; + src[7] = t7; + for( y = 1; y < 8; y++ ) + *(uint64_t*)(src+y*stride) = *(uint64_t*)src; +} +static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; + SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; + SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; + SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; + SRC(7,7)= (t14 + 3*t15 + 2) >> 2; +} +static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; + SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; + +} +static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; + SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; + SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; + SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; + SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; + SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; + SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; + SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; + SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; + SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; + SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; + SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; + SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; + SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; + SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(7,0)= (t6 + t7 + 1) >> 1; +} +static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_LEFT; + PREDICT_8x8_LOAD_TOPLEFT; + SRC(0,7)= (l6 + l7 + 1) >> 1; + SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; + SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; + SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; + SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; + SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; + SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; + SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; + SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; + SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; + SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; + SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; + SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; + SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; + SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; + SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; +} +static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_TOP; + PREDICT_8x8_LOAD_TOPRIGHT; + SRC(0,0)= (t0 + t1 + 1) >> 1; + SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; + SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; + SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; + SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; + SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; + SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; + SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; + SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; + SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; + SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; + SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; + SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; + SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; + SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; + SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; + SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; + SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; + SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; + SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; + SRC(7,6)= (t10 + t11 + 1) >> 1; + SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; +} +static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride) +{ + PREDICT_8x8_LOAD_LEFT; + SRC(0,0)= (l0 + l1 + 1) >> 1; + SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; + SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; + SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; + SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; + SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; + SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; + SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; + SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; + SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; + SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; + SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; + SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; + SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; + SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= + SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= + SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= + SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; +} +#undef PREDICT_8x8_LOAD_LEFT +#undef PREDICT_8x8_LOAD_TOP +#undef PREDICT_8x8_LOAD_TOPLEFT +#undef PREDICT_8x8_LOAD_TOPRIGHT +#undef PREDICT_8x8_DC +#undef PTR +#undef PT +#undef PL +#undef SRC + static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int src_x_offset, int src_y_offset, @@ -2609,6 +2858,19 @@ static void init_pred_ptrs(H264Context *h){ h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c; h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c; + h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c; + h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c; + h->pred8x8l[DC_PRED ]= pred8x8l_dc_c; + h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c; + h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c; + h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c; + h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c; + h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c; + h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c; + h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c; + h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c; + h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c; + h->pred8x8[DC_PRED8x8 ]= pred8x8_dc_c; h->pred8x8[VERT_PRED8x8 ]= pred8x8_vertical_c; h->pred8x8[HOR_PRED8x8 ]= pred8x8_horizontal_c; @@ -2642,6 +2904,8 @@ static void free_tables(H264Context *h){ av_freep(&h->mb2b_xy); av_freep(&h->mb2b8_xy); + av_freep(&h->dequant8_coeff); + av_freep(&h->s.obmc_scratchpad); } @@ -2652,7 +2916,7 @@ static void free_tables(H264Context *h){ static int alloc_tables(H264Context *h){ MpegEncContext * const s = &h->s; const int big_mb_num= s->mb_stride * (s->mb_height+1); - int x,y; + int x,y,q; CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) @@ -2685,6 +2949,17 @@ static int alloc_tables(H264Context *h){ } } + CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t)); + for(q=0; q<52; q++){ + int shift = div6[q]; + int idx = rem6[q]; + if(shift >= 2) // qp<12 are shifted during dequant + shift -= 2; + for(x=0; x<64; x++) + h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][ + dequant8_coeff_init_scan[(x>>1)&12 | x&3] ] << shift; + } + s->obmc_scratchpad = NULL; return 0; @@ -2832,6 +3107,9 @@ b= t; if(deblock_top){ XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); + if(s->mb_x < s->mb_width){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1); + } } if(!(s->flags&CODEC_FLAG_GRAY)){ @@ -3011,6 +3289,16 @@ static void hl_decode_mb(H264Context *h){ if(IS_INTRA4x4(mb_type)){ if(!s->encoding){ + if(IS_8x8DCT(mb_type)){ + for(i=0; i<16; i+=4){ + uint8_t * const ptr= dest_y + block_offset[i]; + const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, + (h->topright_samples_available<<(i+1))&0x8000, linesize); + if(h->non_zero_count_cache[ scan8[i] ]) + s->dsp.h264_idct8_add(ptr, h->mb + i*16, linesize); + } + }else for(i=0; i<16; i++){ uint8_t * const ptr= dest_y + block_offset[i]; uint8_t *topright; @@ -3023,10 +3311,7 @@ static void hl_decode_mb(H264Context *h){ if(!topright_avail){ tr= ptr[3 - linesize]*0x01010101; topright= (uint8_t*) &tr; - }else if(i==5 && h->deblocking_filter){ - tr= *(uint32_t*)h->top_borders[h->mb_aff_frame ? IS_INTERLACED(mb_type) ? bottom : 1 : 0][mb_x+1]; - topright= (uint8_t*) &tr; - }else + }else topright= ptr + 4 - linesize; }else topright= NULL; @@ -3071,10 +3356,13 @@ static void hl_decode_mb(H264Context *h){ if(!IS_INTRA4x4(mb_type)){ if(s->codec_id == CODEC_ID_H264){ - for(i=0; i<16; i++){ + const int di = IS_8x8DCT(mb_type) ? 4 : 1; + void (*idct)(uint8_t *dst, DCTELEM *block, int stride) = + IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add; + for(i=0; i<16; i+=di){ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below uint8_t * const ptr= dest_y + block_offset[i]; - s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); + idct(ptr, h->mb + i*16, linesize); } } }else{ @@ -4075,6 +4363,16 @@ static inline int get_level_prefix(GetBitContext *gb){ return log-1; } +static inline int get_dct8x8_allowed(H264Context *h){ + int i; + for(i=0; i<4; i++){ + if(!IS_SUB_8X8(h->sub_mb_type[i]) + || !h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])) + return 0; + } + return 1; +} + /** * decodes a residual block. * @param n block index @@ -4082,9 +4380,8 @@ static inline int get_level_prefix(GetBitContext *gb){ * @param max_coeff number of coefficients in the block * @return <0 if an error occured */ -static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){ +static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){ MpegEncContext * const s = &h->s; - const uint16_t *qmul= dequant_coeff[qp]; static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}; int level[16], run[16]; int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones; @@ -4272,6 +4569,7 @@ static int decode_mb_cavlc(H264Context *h){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; int mb_type, partition_count, cbp; + int dct8x8_allowed= h->pps.transform_8x8_mode; s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong? @@ -4328,7 +4626,6 @@ decode_intra_mb: if(h->mb_field_decoding_flag) mb_type |= MB_TYPE_INTERLACED; - s->current_picture.mb_type[mb_xy]= mb_type; h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)){ @@ -4366,6 +4663,7 @@ decode_intra_mb: // All coeffs are present memset(h->non_zero_count[mb_xy], 16, 16); + s->current_picture.mb_type[mb_xy]= mb_type; return 0; } @@ -4376,9 +4674,14 @@ decode_intra_mb: // init_top_left_availability(h); if(IS_INTRA4x4(mb_type)){ int i; + int di = 1; + if(dct8x8_allowed && get_bits1(&s->gb)){ + mb_type |= MB_TYPE_8x8DCT; + di = 4; + } // fill_intra4x4_pred_table(h); - for(i=0; i<16; i++){ + for(i=0; i<16; i+=di){ const int mode_coded= !get_bits1(&s->gb); const int predicted_mode= pred_intra_mode(h, i); int mode; @@ -4393,7 +4696,10 @@ decode_intra_mb: mode= predicted_mode; } - h->intra4x4_pred_mode_cache[ scan8[i] ] = mode; + if(di==4) + fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); + else + h->intra4x4_pred_mode_cache[ scan8[i] ] = mode; } write_back_intra_pred_mode(h); if( check_intra4x4_pred_mode(h) < 0) @@ -4454,6 +4760,9 @@ decode_intra_mb: } } + if(dct8x8_allowed) + dct8x8_allowed = get_dct8x8_allowed(h); + for(list=0; list<2; list++){ const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; if(ref_count == 0) continue; @@ -4501,7 +4810,7 @@ decode_intra_mb: } }else if(IS_DIRECT(mb_type)){ pred_direct_motion(h, &mb_type); - s->current_picture.mb_type[mb_xy]= mb_type; + dct8x8_allowed &= h->sps.direct_8x8_inference_flag; }else{ int list, mx, my, i; //FIXME we should set ref_idx_l? to 0 if we use that later ... @@ -4597,6 +4906,12 @@ decode_intra_mb: cbp= golomb_to_inter_cbp[cbp]; } + if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ + if(get_bits1(&s->gb)) + mb_type |= MB_TYPE_8x8DCT; + } + s->current_picture.mb_type[mb_xy]= mb_type; + if(cbp || IS_INTRA16x16(mb_type)){ int i8x8, i4x4, chroma_idx; int chroma_qp, dquant; @@ -4628,7 +4943,7 @@ decode_intra_mb: h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); if(IS_INTRA16x16(mb_type)){ - if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){ + if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, dequant_coeff[s->qscale], 16) < 0){ return -1; //FIXME continue if partitioned and other return -1 too } @@ -4638,7 +4953,7 @@ decode_intra_mb: for(i8x8=0; i8x8<4; i8x8++){ for(i4x4=0; i4x4<4; i4x4++){ const int index= i4x4 + 4*i8x8; - if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, s->qscale, 15) < 0 ){ + if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, dequant_coeff[s->qscale], 15) < 0 ){ return -1; } } @@ -4649,11 +4964,27 @@ decode_intra_mb: }else{ for(i8x8=0; i8x8<4; i8x8++){ if(cbp & (1<<i8x8)){ - for(i4x4=0; i4x4<4; i4x4++){ - const int index= i4x4 + 4*i8x8; + if(IS_8x8DCT(mb_type)){ + DCTELEM *buf = &h->mb[64*i8x8]; + for(i4x4=0; i4x4<4; i4x4++){ + if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, + h->dequant8_coeff[s->qscale], 16) <0 ) + return -1; + } + if(s->qscale < 12){ + int i; + for(i=0; i<64; i++) + buf[i] = (buf[i] + 2) >> 2; + } + uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; + nnz[0] |= nnz[1] | nnz[8] | nnz[9]; + }else{ + for(i4x4=0; i4x4<4; i4x4++){ + const int index= i4x4 + 4*i8x8; - if( decode_residual(h, gb, h->mb + 16*index, index, scan, s->qscale, 16) <0 ){ - return -1; + if( decode_residual(h, gb, h->mb + 16*index, index, scan, dequant_coeff[s->qscale], 16) <0 ){ + return -1; + } } } }else{ @@ -4665,7 +4996,7 @@ decode_intra_mb: if(cbp&0x30){ for(chroma_idx=0; chroma_idx<2; chroma_idx++) - if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, chroma_qp, 4) < 0){ + if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, dequant_coeff[chroma_qp], 4) < 0){ return -1; } } @@ -4674,7 +5005,7 @@ decode_intra_mb: for(chroma_idx=0; chroma_idx<2; chroma_idx++){ for(i4x4=0; i4x4<4; i4x4++){ const int index= 16 + 4*chroma_idx + i4x4; - if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, chroma_qp, 15) < 0){ + if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, dequant_coeff[chroma_qp], 15) < 0){ return -1; } } @@ -5012,6 +5343,10 @@ static int decode_cabac_b_mb_sub_type( H264Context *h ) { return type; } +static inline int decode_cabac_mb_transform_size( H264Context *h ) { + return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); +} + static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { int refa = h->ref_cache[list][scan8[n] - 1]; int refb = h->ref_cache[list][scan8[n] - 8]; @@ -5107,15 +5442,30 @@ static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { return ctx + 4 * cat; } -static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int qp, int max_coeff) { +static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) { const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; - const uint16_t *qmul= dequant_coeff[qp]; static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; - static const int significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 }; - static const int coeff_abs_level_m1_offset[5] = {227+ 0, 227+10, 227+20, 227+30, 227+39 }; - - int index[16]; + static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 }; + static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 }; + static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 }; + static const int identity[15] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 + }; + static const int significant_coeff_flag_offset_8x8[63] = { + 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, + 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, + 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 + }; + static const int last_coeff_flag_offset_8x8[63] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 + }; + + int index[64]; int i, last; int coeff_count = 0; @@ -5123,27 +5473,45 @@ static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat int abslevel1 = 1; int abslevelgt1 = 0; + const int* significant_coeff_ctx_offset; + const int* last_coeff_ctx_offset; + const int significant_coeff_ctx_base = significant_coeff_flag_offset[cat] + + significant_coeff_flag_field_offset[h->mb_field_decoding_flag]; + const int last_coeff_ctx_base = last_significant_coeff_flag_offset[cat] + + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag]; + /* cat: 0-> DC 16x16 n = 0 * 1-> AC 16x16 n = luma4x4idx * 2-> Luma4x4 n = luma4x4idx * 3-> DC Chroma n = iCbCr * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx + * 5-> Luma8x8 n = 4 * luma8x8idx */ /* read coded block flag */ - if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) { - if( cat == 1 || cat == 2 ) - h->non_zero_count_cache[scan8[n]] = 0; - else if( cat == 4 ) - h->non_zero_count_cache[scan8[16+n]] = 0; + if( cat == 5 ) { + significant_coeff_ctx_offset = significant_coeff_flag_offset_8x8; + last_coeff_ctx_offset = last_coeff_flag_offset_8x8; + } else { + if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) { + if( cat == 1 || cat == 2 ) + h->non_zero_count_cache[scan8[n]] = 0; + else if( cat == 4 ) + h->non_zero_count_cache[scan8[16+n]] = 0; - return 0; + return 0; + } + + significant_coeff_ctx_offset = + last_coeff_ctx_offset = identity; } for(last= 0; last < max_coeff - 1; last++) { - if( get_cabac( &h->cabac, &h->cabac_state[significant_coeff_flag_field_offset[h->mb_field_decoding_flag]+significant_coeff_flag_offset[cat]+last] )) { + int sig_ctx = significant_coeff_ctx_base + significant_coeff_ctx_offset[last]; + if( get_cabac( &h->cabac, &h->cabac_state[sig_ctx] )) { + int last_ctx = last_coeff_ctx_base + last_coeff_ctx_offset[last]; index[coeff_count++] = last; - if( get_cabac( &h->cabac, &h->cabac_state[last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag]+significant_coeff_flag_offset[cat]+last] ) ) { + if( get_cabac( &h->cabac, &h->cabac_state[last_ctx] ) ) { last= max_coeff; break; } @@ -5160,9 +5528,11 @@ static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat h->non_zero_count_cache[scan8[n]] = coeff_count; else if( cat == 3 ) h->cbp_table[mb_xy] |= 0x40 << n; - else { - assert( cat == 4 ); + else if( cat == 4 ) h->non_zero_count_cache[scan8[16+n]] = coeff_count; + else { + assert( cat == 5 ); + fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1); } for( i = coeff_count - 1; i >= 0; i-- ) { @@ -5247,6 +5617,7 @@ static int decode_mb_cabac(H264Context *h) { MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; int mb_type, partition_count, cbp = 0; + int dct8x8_allowed= h->pps.transform_8x8_mode; s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?) @@ -5305,7 +5676,6 @@ decode_intra_mb: if(h->mb_field_decoding_flag) mb_type |= MB_TYPE_INTERLACED; - s->current_picture.mb_type[mb_xy]= mb_type; h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)) { @@ -5351,19 +5721,29 @@ decode_intra_mb: h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0); // All coeffs are present memset(h->non_zero_count[mb_xy], 16, 16); + s->current_picture.mb_type[mb_xy]= mb_type; return 0; } fill_caches(h, mb_type, 0); if( IS_INTRA( mb_type ) ) { + int i; if( IS_INTRA4x4( mb_type ) ) { - int i; - for( i = 0; i < 16; i++ ) { - int pred = pred_intra_mode( h, i ); - h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred ); + if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) { + mb_type |= MB_TYPE_8x8DCT; + for( i = 0; i < 16; i+=4 ) { + int pred = pred_intra_mode( h, i ); + int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred ); + fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 ); + } + } else { + for( i = 0; i < 16; i++ ) { + int pred = pred_intra_mode( h, i ); + h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred ); //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] ); + } } write_back_intra_pred_mode(h); if( check_intra4x4_pred_mode(h) < 0 ) return -1; @@ -5420,6 +5800,9 @@ decode_intra_mb: } } + if(dct8x8_allowed) + dct8x8_allowed = get_dct8x8_allowed(h); + for(list=0; list<2; list++){ for(i=0; i<4; i++){ if(IS_DIRECT(h->sub_mb_type[i])){ @@ -5484,9 +5867,9 @@ decode_intra_mb: } } else if( IS_DIRECT(mb_type) ) { pred_direct_motion(h, &mb_type); - s->current_picture.mb_type[mb_xy]= mb_type; fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); + dct8x8_allowed &= h->sps.direct_8x8_inference_flag; } else { int list, mx, my, i, mpx, mpy; if(IS_16X16(mb_type)){ @@ -5585,6 +5968,12 @@ decode_intra_mb: h->cbp_table[mb_xy] = cbp; + if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) { + if( decode_cabac_mb_transform_size( h ) ) + mb_type |= MB_TYPE_8x8DCT; + } + s->current_picture.mb_type[mb_xy]= mb_type; + if( cbp || IS_INTRA16x16( mb_type ) ) { const uint8_t *scan, *dc_scan; int dqp; @@ -5608,12 +5997,12 @@ decode_intra_mb: if( IS_INTRA16x16( mb_type ) ) { int i; //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); - if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, s->qscale, 16) < 0) + if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, dequant_coeff[s->qscale], 16) < 0) return -1; if( cbp&15 ) { for( i = 0; i < 16; i++ ) { //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); - if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, s->qscale, 15) < 0 ) + if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, dequant_coeff[s->qscale], 15) < 0 ) return -1; } } else { @@ -5623,10 +6012,20 @@ decode_intra_mb: int i8x8, i4x4; for( i8x8 = 0; i8x8 < 4; i8x8++ ) { if( cbp & (1<<i8x8) ) { + if( IS_8x8DCT(mb_type) ) { + if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, + zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 ) + return -1; + if(s->qscale < 12){ + int i; + for(i=0; i<64; i++) + h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2; + } + } else for( i4x4 = 0; i4x4 < 4; i4x4++ ) { const int index = 4*i8x8 + i4x4; //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); - if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, s->qscale, 16) < 0 ) + if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, dequant_coeff[s->qscale], 16) < 0 ) return -1; } } else { @@ -5640,7 +6039,7 @@ decode_intra_mb: int c; for( c = 0; c < 2; c++ ) { //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); - if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, h->chroma_qp, 4) < 0) + if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, dequant_coeff[h->chroma_qp], 4) < 0) return -1; } } @@ -5651,7 +6050,7 @@ decode_intra_mb: for( i = 0; i < 4; i++ ) { const int index = 16 + 4 * c + i; //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); - if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->chroma_qp, 15) < 0) + if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, dequant_coeff[h->chroma_qp], 15) < 0) return -1; } } @@ -6018,6 +6417,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { bS[i] = 4; } else if( h->non_zero_count_cache[b_idx] != 0 || + /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */ h->non_zero_count_cache[bn_idx] != 0 ) { bS[i] = 2; } else { @@ -6073,6 +6473,9 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 int bS[4]; int qp; + if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) ) + continue; + if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0) && !IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && IS_INTERLACED(s->current_picture.mb_type[mbn_xy]) @@ -6220,7 +6623,7 @@ static int decode_slice(H264Context *h){ s->gb.buffer + get_bits_count(&s->gb)/8, ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8); /* calculate pre-state */ - for( i= 0; i < 399; i++ ) { + for( i= 0; i < 460; i++ ) { int pre; if( h->slice_type == I_TYPE ) pre = clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 ); @@ -6488,6 +6891,18 @@ static inline int decode_seq_parameter_set(H264Context *h){ sps->profile_idc= profile_idc; sps->level_idc= level_idc; + if(sps->profile_idc >= 100){ //high profile + if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc + get_bits1(&s->gb); //residual_color_transform_flag + get_ue_golomb(&s->gb); //bit_depth_luma_minus8 + get_ue_golomb(&s->gb); //bit_depth_chroma_minus8 + get_bits1(&s->gb); //qpprime_y_zero_transform_bypass_flag + if(get_bits1(&s->gb)){ //seq_scaling_matrix_present_flag + av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n"); + return -1; + } + } + sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; sps->poc_type= get_ue_golomb(&s->gb); @@ -6562,7 +6977,7 @@ static inline int decode_seq_parameter_set(H264Context *h){ return 0; } -static inline int decode_picture_parameter_set(H264Context *h){ +static inline int decode_picture_parameter_set(H264Context *h, int bit_length){ MpegEncContext * const s = &h->s; int pps_id= get_ue_golomb(&s->gb); PPS *pps= &h->pps_buffer[pps_id]; @@ -6623,9 +7038,18 @@ static inline int decode_picture_parameter_set(H264Context *h){ pps->deblocking_filter_parameters_present= get_bits1(&s->gb); pps->constrained_intra_pred= get_bits1(&s->gb); pps->redundant_pic_cnt_present = get_bits1(&s->gb); + + if(get_bits_count(&s->gb) < bit_length){ + pps->transform_8x8_mode= get_bits1(&s->gb); + if(get_bits1(&s->gb)){ //pic_scaling_matrix_present_flag + av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n"); + return -1; + } + get_se_golomb(&s->gb); //second_chroma_qp_index_offset + } if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s\n", + av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n", pps_id, pps->sps_id, pps->cabac ? "CABAC" : "CAVLC", pps->slice_group_count, @@ -6634,7 +7058,8 @@ static inline int decode_picture_parameter_set(H264Context *h){ pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset, pps->deblocking_filter_parameters_present ? "LPAR" : "", pps->constrained_intra_pred ? "CONSTR" : "", - pps->redundant_pic_cnt_present ? "REDU" : "" + pps->redundant_pic_cnt_present ? "REDU" : "", + pps->transform_8x8_mode ? "8x8DCT" : "" ); } @@ -6800,7 +7225,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ case NAL_PPS: init_get_bits(&s->gb, ptr, bit_length); - decode_picture_parameter_set(h); + decode_picture_parameter_set(h, bit_length); break; case NAL_PICTURE_DELIMITER: diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h index 21d2260e88..f72cb2d485 100644 --- a/libavcodec/h264data.h +++ b/libavcodec/h264data.h @@ -306,8 +306,50 @@ static const uint8_t chroma_dc_scan[4]={ (0+1*2)*16, (1+1*2)*16, //FIXME }; +static const uint8_t zigzag_scan8x8[64]={ + 0+0*8, 1+0*8, 0+1*8, 0+2*8, + 1+1*8, 2+0*8, 3+0*8, 2+1*8, + 1+2*8, 0+3*8, 0+4*8, 1+3*8, + 2+2*8, 3+1*8, 4+0*8, 5+0*8, + 4+1*8, 3+2*8, 2+3*8, 1+4*8, + 0+5*8, 0+6*8, 1+5*8, 2+4*8, + 3+3*8, 4+2*8, 5+1*8, 6+0*8, + 7+0*8, 6+1*8, 5+2*8, 4+3*8, + 3+4*8, 2+5*8, 1+6*8, 0+7*8, + 1+7*8, 2+6*8, 3+5*8, 4+4*8, + 5+3*8, 6+2*8, 7+1*8, 7+2*8, + 6+3*8, 5+4*8, 4+5*8, 3+6*8, + 2+7*8, 3+7*8, 4+6*8, 5+5*8, + 6+4*8, 7+3*8, 7+4*8, 6+5*8, + 5+6*8, 4+7*8, 5+7*8, 6+6*8, + 7+5*8, 7+6*8, 6+7*8, 7+7*8, +}; + +// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] +static const uint8_t zigzag_scan8x8_cavlc[64]={ + 0+0*8, 1+1*8, 1+2*8, 2+2*8, + 4+1*8, 0+5*8, 3+3*8, 7+0*8, + 3+4*8, 1+7*8, 5+3*8, 6+3*8, + 2+7*8, 6+4*8, 5+6*8, 7+5*8, + 1+0*8, 2+0*8, 0+3*8, 3+1*8, + 3+2*8, 0+6*8, 4+2*8, 6+1*8, + 2+5*8, 2+6*8, 6+2*8, 5+4*8, + 3+7*8, 7+3*8, 4+7*8, 7+6*8, + 0+1*8, 3+0*8, 0+4*8, 4+0*8, + 2+3*8, 1+5*8, 5+1*8, 5+2*8, + 1+6*8, 3+5*8, 7+1*8, 4+5*8, + 4+6*8, 7+4*8, 5+7*8, 6+7*8, + 0+2*8, 2+1*8, 1+3*8, 5+0*8, + 1+4*8, 2+4*8, 6+0*8, 4+3*8, + 0+7*8, 4+4*8, 7+2*8, 3+6*8, + 5+5*8, 6+5*8, 6+6*8, 7+7*8, +}; + #define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16bit +#define MB_TYPE_8x8DCT 0x01000000 #define IS_REF0(a) ((a)&MB_TYPE_REF0) +#define IS_8x8DCT(a) ((a)&MB_TYPE_8x8DCT) + typedef struct IMbInfo{ uint16_t type; @@ -472,6 +514,18 @@ static const uint16_t dequant_coeff[52][16]={ //{4608,5888,4608,5888, 5888,7424,5888,7424, 4608,5888,4608,5888, 5888,7424,5888,7424, }, }; +static const int dequant8_coeff_init_scan[16] = { + 0,3,4,3, 3,1,5,1, 4,5,2,5, 3,1,5,1 +}; +static const int dequant8_coeff_init[6][6]={ + {20,18,32,19,25,24}, + {22,19,35,21,28,26}, + {26,23,42,24,33,31}, + {28,25,45,26,35,33}, + {32,28,51,30,40,38}, + {36,32,58,34,46,43}, +}; + #define QUANT_SHIFT 22 static const int quant_coeff[52][16]={ @@ -561,7 +615,7 @@ static const int tc0_table[52][3] = { /* Cabac pre state table */ -static const int cabac_context_init_I[399][2] = +static const int cabac_context_init_I[460][2] = { /* 0 - 10 */ { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, @@ -707,10 +761,30 @@ static const int cabac_context_init_I[399][2] = { 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 }, { 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 }, { 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 }, - { 29, 39 }, { 19, 66 } + { 29, 39 }, { 19, 66 }, + + /* 399 -> 435 */ + { 31, 21 }, { 31, 31 }, { 25, 50 }, + { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 }, + { -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 }, + { -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 }, + { -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 }, + { 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 }, + { 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 }, + { -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 }, + { 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 }, + { 0, 68 }, { -9, 92 }, + + /* 436 -> 459 */ + { -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 }, + { -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 }, + { -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 }, + { -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 }, + { 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 }, + { 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 } }; -static const int cabac_context_init_PB[3][399][2] = +static const int cabac_context_init_PB[3][460][2] = { /* i_cabac_init_idc == 0 */ { @@ -847,7 +921,25 @@ static const int cabac_context_init_PB[3][399][2] = { 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 }, { 11, 86 }, - + /* 399 - 435 */ + { 12, 40 }, { 11, 51 }, { 14, 59 }, + { -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 }, + { -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 }, + { -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 }, + { -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 }, + { 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 }, + { 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 }, + { -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 }, + { -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 }, + { -8, 66 }, { -8, 76 }, + + /* 436 - 459 */ + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 }, + { 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 }, + { 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 }, }, /* i_cabac_init_idc == 1 */ @@ -985,6 +1077,25 @@ static const int cabac_context_init_PB[3][399][2] = { 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 }, { 11, 83 }, + /* 399 - 435 */ + { 24, 32 }, { 21, 49 }, { 21, 54 }, + { -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 }, + { -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 8 }, + { -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 }, + { -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, + { -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 }, + { -2, 52 }, { -9, 57 }, { -6, 53 }, { -4, 65 }, + { -4, 67 }, { -7, 82 }, + + /* 436 - 459 */ + { -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 }, + { -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 }, + { -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 }, + { -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 }, + { 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 }, + { 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 }, }, /* i_cabac_init_idc == 2 */ @@ -1121,5 +1232,25 @@ static const int cabac_context_init_PB[3][399][2] = { 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 }, { 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 }, { 25, 61 }, + + /* 399 - 435 */ + { 21, 33 }, { 19, 50 }, { 17, 61 }, + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, + { -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 }, + { -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 }, + { -6, 68 }, { -10, 79 }, + + /* 436 - 459 */ + { -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 }, + { -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 }, + { -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 }, + { -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 }, + { 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 }, + { 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 }, } }; diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c index 551197d37e..e59324d58f 100644 --- a/libavcodec/h264idct.c +++ b/libavcodec/h264idct.c @@ -68,3 +68,74 @@ void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ idct_internal(dst, block, stride, 8, 3, 0); } + +void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ + int i; + DCTELEM (*src)[8] = (DCTELEM(*)[8])block; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + block[0] += 32; + + for( i = 0; i < 8; i++ ) + { + const int a0 = src[i][0] + src[i][4]; + const int a2 = src[i][0] - src[i][4]; + const int a4 = (src[i][2]>>1) - src[i][6]; + const int a6 = (src[i][6]>>1) + src[i][2]; + + const int b0 = a0 + a6; + const int b2 = a2 + a4; + const int b4 = a2 - a4; + const int b6 = a0 - a6; + + const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1); + const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1); + const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1); + const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1); + + const int b1 = (a7>>2) + a1; + const int b3 = a3 + (a5>>2); + const int b5 = (a3>>2) - a5; + const int b7 = a7 - (a1>>2); + + src[i][0] = b0 + b7; + src[i][7] = b0 - b7; + src[i][1] = b2 + b5; + src[i][6] = b2 - b5; + src[i][2] = b4 + b3; + src[i][5] = b4 - b3; + src[i][3] = b6 + b1; + src[i][4] = b6 - b1; + } + for( i = 0; i < 8; i++ ) + { + const int a0 = src[0][i] + src[4][i]; + const int a2 = src[0][i] - src[4][i]; + const int a4 = (src[2][i]>>1) - src[6][i]; + const int a6 = (src[6][i]>>1) + src[2][i]; + + const int b0 = a0 + a6; + const int b2 = a2 + a4; + const int b4 = a2 - a4; + const int b6 = a0 - a6; + + const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1); + const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1); + const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1); + const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1); + + const int b1 = (a7>>2) + a1; + const int b3 = a3 + (a5>>2); + const int b5 = (a3>>2) - a5; + const int b7 = a7 - (a1>>2); + + dst[i + 0*stride] = cm[ dst[i + 0*stride] + ((b0 + b7) >> 6) ]; + dst[i + 1*stride] = cm[ dst[i + 1*stride] + ((b2 + b5) >> 6) ]; + dst[i + 2*stride] = cm[ dst[i + 2*stride] + ((b4 + b3) >> 6) ]; + dst[i + 3*stride] = cm[ dst[i + 3*stride] + ((b6 + b1) >> 6) ]; + dst[i + 4*stride] = cm[ dst[i + 4*stride] + ((b6 - b1) >> 6) ]; + dst[i + 5*stride] = cm[ dst[i + 5*stride] + ((b4 - b3) >> 6) ]; + dst[i + 6*stride] = cm[ dst[i + 6*stride] + ((b2 - b5) >> 6) ]; + dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ]; + } +} |