diff options
-rw-r--r-- | libavcodec/dxva2_h264.c | 21 | ||||
-rw-r--r-- | libavcodec/h264.c | 95 | ||||
-rw-r--r-- | libavcodec/h264.h | 38 | ||||
-rw-r--r-- | libavcodec/h264_mb.c | 69 | ||||
-rw-r--r-- | libavcodec/h264_mb_template.c | 10 | ||||
-rw-r--r-- | libavcodec/h264_mc_template.c | 36 | ||||
-rw-r--r-- | libavcodec/h264_parser.c | 9 | ||||
-rw-r--r-- | libavcodec/h264_refs.c | 10 | ||||
-rw-r--r-- | libavcodec/h264_slice.c | 69 | ||||
-rw-r--r-- | libavcodec/svq3.c | 2 | ||||
-rw-r--r-- | libavcodec/vaapi_h264.c | 26 |
11 files changed, 220 insertions, 165 deletions
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c index 6deccc3e9a..c5dccfe749 100644 --- a/libavcodec/dxva2_h264.c +++ b/libavcodec/dxva2_h264.c @@ -211,6 +211,7 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, const DXVA_PicParams_H264 *pp, unsigned position, unsigned size) { const H264Context *h = avctx->priv_data; + H264SliceContext *sl = &h->slice_ctx[0]; struct dxva_context *ctx = avctx->hwaccel_context; unsigned list; @@ -225,8 +226,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, slice->slice_type = ff_h264_get_slice_type(h); if (h->slice_type_fixed) slice->slice_type += 5; - slice->luma_log2_weight_denom = h->luma_log2_weight_denom; - slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom; + slice->luma_log2_weight_denom = sl->luma_log2_weight_denom; + slice->chroma_log2_weight_denom = sl->chroma_log2_weight_denom; if (h->list_count > 0) slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1; if (h->list_count > 1) @@ -250,15 +251,15 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, r->reference == PICT_BOTTOM_FIELD); for (plane = 0; plane < 3; plane++) { int w, o; - if (plane == 0 && h->luma_weight_flag[list]) { - w = h->luma_weight[i][list][0]; - o = h->luma_weight[i][list][1]; - } else if (plane >= 1 && h->chroma_weight_flag[list]) { - w = h->chroma_weight[i][list][plane-1][0]; - o = h->chroma_weight[i][list][plane-1][1]; + if (plane == 0 && sl->luma_weight_flag[list]) { + w = sl->luma_weight[i][list][0]; + o = sl->luma_weight[i][list][1]; + } else if (plane >= 1 && sl->chroma_weight_flag[list]) { + w = sl->chroma_weight[i][list][plane-1][0]; + o = sl->chroma_weight[i][list][plane-1][1]; } else { - w = 1 << (plane == 0 ? h->luma_log2_weight_denom : - h->chroma_log2_weight_denom); + w = 1 << (plane == 0 ? sl->luma_log2_weight_denom : + sl->chroma_log2_weight_denom); o = 0; } slice->Weights[list][i][plane][0] = w; diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 8ec46dfb1c..755e219cf5 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -91,7 +91,7 @@ static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4); h->mb_mbaff = h->mb_field_decoding_flag = 0; - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, &h->slice_ctx[0]); } void ff_h264_draw_horiz_band(H264Context *h, int y, int height) @@ -673,7 +673,17 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx) h->pixel_shift = 0; h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; + h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ? H264_MAX_THREADS : 1; + h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx)); + if (!h->slice_ctx) { + h->nb_slice_ctx = 0; + return AVERROR(ENOMEM); + } + h->thread_context[0] = h; + for (i = 0; i < h->nb_slice_ctx; i++) + h->slice_ctx[i].h264 = h->thread_context[0]; + h->outputed_poc = h->next_outputed_poc = INT_MIN; for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) h->last_pocs[i] = INT_MIN; @@ -716,12 +726,23 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx) static int decode_init_thread_copy(AVCodecContext *avctx) { H264Context *h = avctx->priv_data; + int i; if (!avctx->internal->is_copy) return 0; memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); + h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ? H264_MAX_THREADS : 1; + h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx)); + if (!h->slice_ctx) { + h->nb_slice_ctx = 0; + return AVERROR(ENOMEM); + } + + for (i = 0; i < h->nb_slice_ctx; i++) + h->slice_ctx[i].h264 = h; + h->avctx = avctx; h->rbsp_buffer[0] = NULL; h->rbsp_buffer[1] = NULL; @@ -980,47 +1001,47 @@ static void decode_postinit(H264Context *h, int setup_finished) ff_thread_finish_setup(h->avctx); } -int ff_pred_weight_table(H264Context *h) +int ff_pred_weight_table(H264Context *h, H264SliceContext *sl) { int list, i; int luma_def, chroma_def; - h->use_weight = 0; - h->use_weight_chroma = 0; - h->luma_log2_weight_denom = get_ue_golomb(&h->gb); + sl->use_weight = 0; + sl->use_weight_chroma = 0; + sl->luma_log2_weight_denom = get_ue_golomb(&h->gb); if (h->sps.chroma_format_idc) - h->chroma_log2_weight_denom = get_ue_golomb(&h->gb); + sl->chroma_log2_weight_denom = get_ue_golomb(&h->gb); - if (h->luma_log2_weight_denom > 7U) { - av_log(h->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is out of range\n", h->luma_log2_weight_denom); - h->luma_log2_weight_denom = 0; + if (sl->luma_log2_weight_denom > 7U) { + av_log(h->avctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is out of range\n", sl->luma_log2_weight_denom); + sl->luma_log2_weight_denom = 0; } - if (h->chroma_log2_weight_denom > 7U) { - av_log(h->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %d is out of range\n", h->chroma_log2_weight_denom); - h->chroma_log2_weight_denom = 0; + if (sl->chroma_log2_weight_denom > 7U) { + av_log(h->avctx, AV_LOG_ERROR, "chroma_log2_weight_denom %d is out of range\n", sl->chroma_log2_weight_denom); + sl->chroma_log2_weight_denom = 0; } - luma_def = 1 << h->luma_log2_weight_denom; - chroma_def = 1 << h->chroma_log2_weight_denom; + luma_def = 1 << sl->luma_log2_weight_denom; + chroma_def = 1 << sl->chroma_log2_weight_denom; for (list = 0; list < 2; list++) { - h->luma_weight_flag[list] = 0; - h->chroma_weight_flag[list] = 0; + sl->luma_weight_flag[list] = 0; + sl->chroma_weight_flag[list] = 0; for (i = 0; i < h->ref_count[list]; i++) { int luma_weight_flag, chroma_weight_flag; luma_weight_flag = get_bits1(&h->gb); if (luma_weight_flag) { - h->luma_weight[i][list][0] = get_se_golomb(&h->gb); - h->luma_weight[i][list][1] = get_se_golomb(&h->gb); - if (h->luma_weight[i][list][0] != luma_def || - h->luma_weight[i][list][1] != 0) { - h->use_weight = 1; - h->luma_weight_flag[list] = 1; + sl->luma_weight[i][list][0] = get_se_golomb(&h->gb); + sl->luma_weight[i][list][1] = get_se_golomb(&h->gb); + if (sl->luma_weight[i][list][0] != luma_def || + sl->luma_weight[i][list][1] != 0) { + sl->use_weight = 1; + sl->luma_weight_flag[list] = 1; } } else { - h->luma_weight[i][list][0] = luma_def; - h->luma_weight[i][list][1] = 0; + sl->luma_weight[i][list][0] = luma_def; + sl->luma_weight[i][list][1] = 0; } if (h->sps.chroma_format_idc) { @@ -1028,19 +1049,19 @@ int ff_pred_weight_table(H264Context *h) if (chroma_weight_flag) { int j; for (j = 0; j < 2; j++) { - h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); - h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); - if (h->chroma_weight[i][list][j][0] != chroma_def || - h->chroma_weight[i][list][j][1] != 0) { - h->use_weight_chroma = 1; - h->chroma_weight_flag[list] = 1; + sl->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); + sl->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); + if (sl->chroma_weight[i][list][j][0] != chroma_def || + sl->chroma_weight[i][list][j][1] != 0) { + sl->use_weight_chroma = 1; + sl->chroma_weight_flag[list] = 1; } } } else { int j; for (j = 0; j < 2; j++) { - h->chroma_weight[i][list][j][0] = chroma_def; - h->chroma_weight[i][list][j][1] = 0; + sl->chroma_weight[i][list][j][0] = chroma_def; + sl->chroma_weight[i][list][j][1] = 0; } } } @@ -1048,7 +1069,7 @@ int ff_pred_weight_table(H264Context *h) if (h->slice_type_nos != AV_PICTURE_TYPE_B) break; } - h->use_weight = h->use_weight || h->use_weight_chroma; + sl->use_weight = sl->use_weight || sl->use_weight_chroma; return 0; } @@ -1412,6 +1433,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, { AVCodecContext *const avctx = h->avctx; H264Context *hx; ///< thread context + H264SliceContext *sl; int buf_index; unsigned context_count; int next_avc; @@ -1469,6 +1491,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, } hx = h->thread_context[context_count]; + sl = &h->slice_ctx[context_count]; ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); @@ -1546,7 +1569,7 @@ again: hx->intra_gb_ptr = hx->inter_gb_ptr = &hx->gb; - if ((err = ff_h264_decode_slice_header(hx, h))) + if ((err = ff_h264_decode_slice_header(hx, sl, h))) break; if (h->sei_recovery_frame_cnt >= 0) { @@ -1681,6 +1704,7 @@ again: h->nal_unit_type = hx->nal_unit_type; h->nal_ref_idc = hx->nal_ref_idc; hx = h; + sl = &h->slice_ctx[0]; goto again; } } @@ -1905,6 +1929,9 @@ av_cold void ff_h264_free_context(H264Context *h) ff_h264_free_tables(h, 1); // FIXME cleanup init stuff perhaps + av_freep(&h->slice_ctx); + h->nb_slice_ctx = 0; + for (i = 0; i < MAX_SPS_COUNT; i++) av_freep(h->sps_buffers + i); diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 8496f0bcc1..f1af197f18 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -333,6 +333,22 @@ typedef struct H264Picture { int crop_top; } H264Picture; +typedef struct H264SliceContext { + struct H264Context *h264; + + // Weighted pred stuff + int use_weight; + int use_weight_chroma; + int luma_log2_weight_denom; + int chroma_log2_weight_denom; + int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag + int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag + // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss + int luma_weight[48][2][2]; + int chroma_weight[48][2][2][2]; + int implicit_weight[48][48][2]; +} H264SliceContext; + /** * H264Context */ @@ -351,6 +367,9 @@ typedef struct H264Context { H264Picture cur_pic; H264Picture last_pic_for_ec; + H264SliceContext *slice_ctx; + int nb_slice_ctx; + int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264 int chroma_qp[2]; // QPc @@ -459,15 +478,6 @@ typedef struct H264Context { DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; - // Weighted pred stuff - int use_weight; - int use_weight_chroma; - int luma_log2_weight_denom; - int chroma_log2_weight_denom; - // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss - int luma_weight[48][2][2]; - int chroma_weight[48][2][2][2]; - int implicit_weight[48][48][2]; int direct_spatial_mv_pred; int col_parity; @@ -737,8 +747,6 @@ typedef struct H264Context { int missing_fields; - int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag - int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag // Timestamp stuff int sei_buffering_period_present; ///< Buffering period SEI flag @@ -822,7 +830,7 @@ int ff_h264_alloc_tables(H264Context *h); int ff_h264_fill_default_ref_list(H264Context *h); int ff_h264_decode_ref_pic_list_reordering(H264Context *h); -void ff_h264_fill_mbaff_ref_list(H264Context *h); +void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl); void ff_h264_remove_all_refs(H264Context *h); /** @@ -847,7 +855,7 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h); */ int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma); -void ff_h264_hl_decode_mb(H264Context *h); +void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl); int ff_h264_decode_extradata(H264Context *h, const uint8_t *buf, int size); int ff_h264_decode_init(AVCodecContext *avctx); void ff_h264_decode_init_vlc(void); @@ -1137,10 +1145,10 @@ int ff_h264_set_parameter_from_sps(H264Context *h); void ff_h264_draw_horiz_band(H264Context *h, int y, int height); int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc); -int ff_pred_weight_table(H264Context *h); +int ff_pred_weight_table(H264Context *h, H264SliceContext *sl); int ff_set_ref_count(H264Context *h); -int ff_h264_decode_slice_header(H264Context *h, H264Context *h0); +int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0); #define SLICE_SINGLETHREAD 1 #define SLICE_SKIPED 2 diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index a4653aadbe..5f4fd13122 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -362,7 +362,8 @@ static av_always_inline void mc_part_std(H264Context *h, int n, int square, } } -static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, +static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *sl, + int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, @@ -415,8 +416,8 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma_idc); - if (h->use_weight == 2) { - int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1]; + if (sl->use_weight == 2) { + int weight0 = sl->implicit_weight[refn0][refn1][h->mb_y & 1]; int weight1 = 64 - weight0; luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, 5, weight0, weight1, 0); @@ -428,24 +429,24 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, } } else { luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, - h->luma_log2_weight_denom, - h->luma_weight[refn0][0][0], - h->luma_weight[refn1][1][0], - h->luma_weight[refn0][0][1] + - h->luma_weight[refn1][1][1]); + sl->luma_log2_weight_denom, + sl->luma_weight[refn0][0][0], + sl->luma_weight[refn1][1][0], + sl->luma_weight[refn0][0][1] + + sl->luma_weight[refn1][1][1]); if (!CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn0][0][0][0], - h->chroma_weight[refn1][1][0][0], - h->chroma_weight[refn0][0][0][1] + - h->chroma_weight[refn1][1][0][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn0][0][0][0], + sl->chroma_weight[refn1][1][0][0], + sl->chroma_weight[refn0][0][0][1] + + sl->chroma_weight[refn1][1][0][1]); chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn0][0][1][0], - h->chroma_weight[refn1][1][1][0], - h->chroma_weight[refn0][0][1][1] + - h->chroma_weight[refn1][1][1][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn0][0][1][0], + sl->chroma_weight[refn1][1][1][0], + sl->chroma_weight[refn0][0][1][1] + + sl->chroma_weight[refn1][1][1][1]); } } } else { @@ -457,19 +458,19 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, qpix_put, chroma_put, pixel_shift, chroma_idc); luma_weight_op(dest_y, h->mb_linesize, height, - h->luma_log2_weight_denom, - h->luma_weight[refn][list][0], - h->luma_weight[refn][list][1]); + sl->luma_log2_weight_denom, + sl->luma_weight[refn][list][0], + sl->luma_weight[refn][list][1]); if (!CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { - if (h->use_weight_chroma) { + if (sl->use_weight_chroma) { chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn][list][0][0], - h->chroma_weight[refn][list][0][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn][list][0][0], + sl->chroma_weight[refn][list][0][1]); chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn][list][1][0], - h->chroma_weight[refn][list][1][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn][list][1][0], + sl->chroma_weight[refn][list][1][1]); } } } @@ -806,7 +807,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, #define SIMPLE 0 #include "h264_mb_template.c" -void ff_h264_hl_decode_mb(H264Context *h) +void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl) { const int mb_xy = h->mb_xy; const int mb_type = h->cur_pic.mb_type[mb_xy]; @@ -815,13 +816,13 @@ void ff_h264_hl_decode_mb(H264Context *h) if (CHROMA444(h)) { if (is_complex || h->pixel_shift) - hl_decode_mb_444_complex(h); + hl_decode_mb_444_complex(h, sl); else - hl_decode_mb_444_simple_8(h); + hl_decode_mb_444_simple_8(h, sl); } else if (is_complex) { - hl_decode_mb_complex(h); + hl_decode_mb_complex(h, sl); } else if (h->pixel_shift) { - hl_decode_mb_simple_16(h); + hl_decode_mb_simple_16(h, sl); } else - hl_decode_mb_simple_8(h); + hl_decode_mb_simple_8(h, sl); } diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c index 7c9d72b35a..23bf53391d 100644 --- a/libavcodec/h264_mb_template.c +++ b/libavcodec/h264_mb_template.c @@ -40,7 +40,7 @@ #define CHROMA_IDC 2 #include "h264_mc_template.c" -static av_noinline void FUNC(hl_decode_mb)(H264Context *h) +static av_noinline void FUNC(hl_decode_mb)(H264Context *h, H264SliceContext *sl) { const int mb_x = h->mb_x; const int mb_y = h->mb_y; @@ -173,13 +173,13 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT); } else if (is_h264) { if (chroma422) { - FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr, + FUNC(hl_motion_422)(h, sl, dest_y, dest_cb, dest_cr, h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); } else { - FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr, + FUNC(hl_motion_420)(h, sl, dest_y, dest_cb, dest_cr, h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, h->h264dsp.weight_h264_pixels_tab, @@ -269,7 +269,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) #define CHROMA_IDC 3 #include "h264_mc_template.c" -static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) +static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h, H264SliceContext *sl) { const int mb_x = h->mb_x; const int mb_y = h->mb_y; @@ -352,7 +352,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, SIMPLE, PIXEL_SHIFT); } else { - FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2], + FUNC(hl_motion_444)(h, sl, dest[0], dest[1], dest[2], h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, h->h264dsp.weight_h264_pixels_tab, diff --git a/libavcodec/h264_mc_template.c b/libavcodec/h264_mc_template.c index 0e4e47713d..1e6f1714c9 100644 --- a/libavcodec/h264_mc_template.c +++ b/libavcodec/h264_mc_template.c @@ -34,7 +34,8 @@ #undef mc_part #define mc_part MCFUNC(mc_part) -static void mc_part(H264Context *h, int n, int square, +static void mc_part(H264Context *h, H264SliceContext *sl, + int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, @@ -47,10 +48,10 @@ static void mc_part(H264Context *h, int n, int square, h264_biweight_func *weight_avg, int list0, int list1) { - if ((h->use_weight == 2 && list0 && list1 && - (h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) || - h->use_weight == 1) - mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr, + if ((sl->use_weight == 2 && list0 && list1 && + (sl->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) || + sl->use_weight == 1) + mc_part_weighted(h, sl, n, square, height, delta, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put, weight_op[0], weight_op[1], weight_avg[0], weight_avg[1], list0, list1, PIXEL_SHIFT, CHROMA_IDC); @@ -60,7 +61,8 @@ static void mc_part(H264Context *h, int n, int square, chroma_avg, list0, list1, PIXEL_SHIFT, CHROMA_IDC); } -static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, +static void MCFUNC(hl_motion)(H264Context *h, H264SliceContext *sl, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func(*qpix_put)[16], h264_chroma_mc_func(*chroma_put), @@ -79,25 +81,25 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, prefetch_motion(h, 0, PIXEL_SHIFT, CHROMA_IDC); if (IS_16X16(mb_type)) { - mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); } else if (IS_16X8(mb_type)) { - mc_part(h, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - mc_part(h, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, + mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else if (IS_8X16(mb_type)) { - mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, sl, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, + mc_part(h, sl, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); @@ -113,29 +115,29 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, int y_offset = (i & 2) << 1; if (IS_SUB_8X8(sub_mb_type)) { - mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, + mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_8X4(sub_mb_type)) { - mc_part(h, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, + mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - mc_part(h, n + 2, 0, 4, 4 << PIXEL_SHIFT, + mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset + 2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_4X8(sub_mb_type)) { - mc_part(h, n, 0, 8, 4 * h->mb_linesize, + mc_part(h, sl, n, 0, 8, 4 * h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize, + mc_part(h, sl, n + 1, 0, 8, 4 * h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset + 2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], @@ -146,7 +148,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, for (j = 0; j < 4; j++) { int sub_x_offset = x_offset + 2 * (j & 1); int sub_y_offset = y_offset + (j & 2); - mc_part(h, n + j, 1, 4, 0, + mc_part(h, sl, n + j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index 8eda6b88a9..e906fd53cf 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -133,6 +133,7 @@ static int scan_mmco_reset(AVCodecParserContext *s) { H264ParseContext *p = s->priv_data; H264Context *h = &p->h; + H264SliceContext *sl = &h->slice_ctx[0]; h->slice_type_nos = s->pict_type & 3; @@ -172,7 +173,7 @@ static int scan_mmco_reset(AVCodecParserContext *s) if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B)) - ff_pred_weight_table(h); + ff_pred_weight_table(h, sl); if (get_bits1(&h->gb)) { // adaptive_ref_pic_marking_mode_flag int i; @@ -603,6 +604,12 @@ static av_cold int init(AVCodecParserContext *s) { H264ParseContext *p = s->priv_data; H264Context *h = &p->h; + + h->slice_ctx = av_mallocz(sizeof(*h->slice_ctx)); + if (!h->slice_ctx) + return 0; + h->nb_slice_ctx = 1; + h->thread_context[0] = h; h->slice_context_count = 1; ff_h264dsp_init(&h->h264dsp, 8, 1); diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c index 75025dc3fe..ce5304998b 100644 --- a/libavcodec/h264_refs.c +++ b/libavcodec/h264_refs.c @@ -341,7 +341,7 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h) return 0; } -void ff_h264_fill_mbaff_ref_list(H264Context *h) +void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl) { int list, i, j; for (list = 0; list < h->list_count; list++) { @@ -359,11 +359,11 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h) field[1].reference = PICT_BOTTOM_FIELD; field[1].poc = field[1].field_poc[1]; - h->luma_weight[16 + 2 * i][list][0] = h->luma_weight[16 + 2 * i + 1][list][0] = h->luma_weight[i][list][0]; - h->luma_weight[16 + 2 * i][list][1] = h->luma_weight[16 + 2 * i + 1][list][1] = h->luma_weight[i][list][1]; + sl->luma_weight[16 + 2 * i][list][0] = sl->luma_weight[16 + 2 * i + 1][list][0] = sl->luma_weight[i][list][0]; + sl->luma_weight[16 + 2 * i][list][1] = sl->luma_weight[16 + 2 * i + 1][list][1] = sl->luma_weight[i][list][1]; for (j = 0; j < 2; j++) { - h->chroma_weight[16 + 2 * i][list][j][0] = h->chroma_weight[16 + 2 * i + 1][list][j][0] = h->chroma_weight[i][list][j][0]; - h->chroma_weight[16 + 2 * i][list][j][1] = h->chroma_weight[16 + 2 * i + 1][list][j][1] = h->chroma_weight[i][list][j][1]; + sl->chroma_weight[16 + 2 * i][list][j][0] = sl->chroma_weight[16 + 2 * i + 1][list][j][0] = sl->chroma_weight[i][list][j][0]; + sl->chroma_weight[16 + 2 * i][list][j][1] = sl->chroma_weight[16 + 2 * i + 1][list][j][1] = sl->chroma_weight[i][list][j][1]; } } } diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index e9a89d11f7..ea2b97925c 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -521,6 +521,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst, memcpy(h->block_offset, h1->block_offset, sizeof(h->block_offset)); if (!inited) { + H264SliceContext *orig_slice_ctx = h->slice_ctx; + for (i = 0; i < MAX_SPS_COUNT; i++) av_freep(h->sps_buffers + i); @@ -545,6 +547,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst, memset(&h->cur_pic, 0, sizeof(h->cur_pic)); memset(&h->last_pic_for_ec, 0, sizeof(h->last_pic_for_ec)); + h->slice_ctx = orig_slice_ctx; + h->avctx = dst; h->DPB = NULL; h->qscale_table_pool = NULL; @@ -877,13 +881,13 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, * @param field 0/1 initialize the weight for interlaced MBAFF * -1 initializes the rest */ -static void implicit_weight_table(H264Context *h, int field) +static void implicit_weight_table(H264Context *h, H264SliceContext *sl, int field) { int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; for (i = 0; i < 2; i++) { - h->luma_weight_flag[i] = 0; - h->chroma_weight_flag[i] = 0; + sl->luma_weight_flag[i] = 0; + sl->chroma_weight_flag[i] = 0; } if (field < 0) { @@ -894,8 +898,8 @@ static void implicit_weight_table(H264Context *h, int field) } if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) { - h->use_weight = 0; - h->use_weight_chroma = 0; + sl->use_weight = 0; + sl->use_weight_chroma = 0; return; } ref_start = 0; @@ -908,10 +912,10 @@ static void implicit_weight_table(H264Context *h, int field) ref_count1 = 16 + 2 * h->ref_count[1]; } - h->use_weight = 2; - h->use_weight_chroma = 2; - h->luma_log2_weight_denom = 5; - h->chroma_log2_weight_denom = 5; + sl->use_weight = 2; + sl->use_weight_chroma = 2; + sl->luma_log2_weight_denom = 5; + sl->chroma_log2_weight_denom = 5; for (ref0 = ref_start; ref0 < ref_count0; ref0++) { int poc0 = h->ref_list[0][ref0].poc; @@ -929,10 +933,10 @@ static void implicit_weight_table(H264Context *h, int field) } } if (field < 0) { - h->implicit_weight[ref0][ref1][0] = - h->implicit_weight[ref0][ref1][1] = w; + sl->implicit_weight[ref0][ref1][0] = + sl->implicit_weight[ref0][ref1][1] = w; } else { - h->implicit_weight[ref0][ref1][field] = w; + sl->implicit_weight[ref0][ref1][field] = w; } } } @@ -1235,6 +1239,8 @@ static int h264_slice_header_init(H264Context *h, int reinit) c->workaround_bugs = h->workaround_bugs; c->pict_type = h->pict_type; + h->slice_ctx[i].h264 = c; + init_scan_tables(c); clone_tables(c, h, i); c->context_initialized = 1; @@ -1277,7 +1283,7 @@ static enum AVPixelFormat non_j_pixfmt(enum AVPixelFormat a) * * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded */ -int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) +int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0) { unsigned int first_mb_in_slice; unsigned int pps_id; @@ -1811,15 +1817,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B)) - ff_pred_weight_table(h); + ff_pred_weight_table(h, sl); else if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { - implicit_weight_table(h, -1); + implicit_weight_table(h, sl, -1); } else { - h->use_weight = 0; + sl->use_weight = 0; for (i = 0; i < 2; i++) { - h->luma_weight_flag[i] = 0; - h->chroma_weight_flag[i] = 0; + sl->luma_weight_flag[i] = 0; + sl->chroma_weight_flag[i] = 0; } } @@ -1837,11 +1843,11 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) } if (FRAME_MBAFF(h)) { - ff_h264_fill_mbaff_ref_list(h); + ff_h264_fill_mbaff_ref_list(h, sl); if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { - implicit_weight_table(h, 0); - implicit_weight_table(h, 1); + implicit_weight_table(h, sl, 0); + implicit_weight_table(h, sl, 1); } } @@ -2010,8 +2016,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) h->qscale, h->deblocking_filter, h->slice_alpha_c0_offset, h->slice_beta_offset, - h->use_weight, - h->use_weight == 1 && h->use_weight_chroma ? "c" : "", + sl->use_weight, + sl->use_weight == 1 && sl->use_weight_chroma ? "c" : "", h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""); } @@ -2390,7 +2396,8 @@ static void er_add_slice(H264Context *h, int startx, int starty, static int decode_slice(struct AVCodecContext *avctx, void *arg) { - H264Context *h = *(void **)arg; + H264SliceContext *sl = arg; + H264Context *h = sl->h264; int lf_x_start = h->mb_x; h->mb_skip_run = -1; @@ -2429,7 +2436,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) // STOP_TIMER("decode_mb_cabac") if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); // FIXME optimal? or let mb_decode decode 16x32 ? if (ret >= 0 && FRAME_MBAFF(h)) { @@ -2438,7 +2445,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) ret = ff_h264_decode_mb_cabac(h); if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); h->mb_y--; } eos = get_cabac_terminate(&h->cabac); @@ -2490,7 +2497,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) int ret = ff_h264_decode_mb_cavlc(h); if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); // FIXME optimal? or let mb_decode decode 16x32 ? if (ret >= 0 && FRAME_MBAFF(h)) { @@ -2498,7 +2505,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) ret = ff_h264_decode_mb_cavlc(h); if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); h->mb_y--; } @@ -2579,7 +2586,7 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count) h->avctx->codec->capabilities & CODEC_CAP_HWACCEL_VDPAU) return 0; if (context_count == 1) { - return decode_slice(avctx, &h); + return decode_slice(avctx, &h->slice_ctx[0]); } else { av_assert0(context_count > 0); for (i = 1; i < context_count; i++) { @@ -2590,8 +2597,8 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count) hx->x264_build = h->x264_build; } - avctx->execute(avctx, decode_slice, h->thread_context, - NULL, context_count, sizeof(void *)); + avctx->execute(avctx, decode_slice, h->slice_ctx, + NULL, context_count, sizeof(h->slice_ctx[0])); /* pull back stuff from slices to master context */ hx = h->thread_context[context_count - 1]; diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index 5205e13093..0ddcd6a7b4 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -1303,7 +1303,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data, } if (mb_type != 0 || h->cbp) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, &h->slice_ctx[0]); if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay) h->cur_pic.mb_type[h->mb_x + h->mb_y * h->mb_stride] = diff --git a/libavcodec/vaapi_h264.c b/libavcodec/vaapi_h264.c index 8eb8a66f15..2e38ba6dae 100644 --- a/libavcodec/vaapi_h264.c +++ b/libavcodec/vaapi_h264.c @@ -192,27 +192,28 @@ static void fill_vaapi_plain_pred_weight_table(H264Context *h, short chroma_weight[32][2], short chroma_offset[32][2]) { + H264SliceContext *sl = &h->slice_ctx[0]; unsigned int i, j; - *luma_weight_flag = h->luma_weight_flag[list]; - *chroma_weight_flag = h->chroma_weight_flag[list]; + *luma_weight_flag = sl->luma_weight_flag[list]; + *chroma_weight_flag = sl->chroma_weight_flag[list]; for (i = 0; i < h->ref_count[list]; i++) { /* VA API also wants the inferred (default) values, not only what is available in the bitstream (7.4.3.2). */ - if (h->luma_weight_flag[list]) { - luma_weight[i] = h->luma_weight[i][list][0]; - luma_offset[i] = h->luma_weight[i][list][1]; + if (sl->luma_weight_flag[list]) { + luma_weight[i] = sl->luma_weight[i][list][0]; + luma_offset[i] = sl->luma_weight[i][list][1]; } else { - luma_weight[i] = 1 << h->luma_log2_weight_denom; + luma_weight[i] = 1 << sl->luma_log2_weight_denom; luma_offset[i] = 0; } for (j = 0; j < 2; j++) { - if (h->chroma_weight_flag[list]) { - chroma_weight[i][j] = h->chroma_weight[i][list][j][0]; - chroma_offset[i][j] = h->chroma_weight[i][list][j][1]; + if (sl->chroma_weight_flag[list]) { + chroma_weight[i][j] = sl->chroma_weight[i][list][j][0]; + chroma_offset[i][j] = sl->chroma_weight[i][list][j][1]; } else { - chroma_weight[i][j] = 1 << h->chroma_log2_weight_denom; + chroma_weight[i][j] = 1 << sl->chroma_log2_weight_denom; chroma_offset[i][j] = 0; } } @@ -316,6 +317,7 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx, uint32_t size) { H264Context * const h = avctx->priv_data; + H264SliceContext *sl = &h->slice_ctx[0]; VASliceParameterBufferH264 *slice_param; av_dlog(avctx, "vaapi_h264_decode_slice(): buffer %p, size %d\n", @@ -336,8 +338,8 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx, slice_param->disable_deblocking_filter_idc = h->deblocking_filter < 2 ? !h->deblocking_filter : h->deblocking_filter; slice_param->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2; slice_param->slice_beta_offset_div2 = h->slice_beta_offset / 2; - slice_param->luma_log2_weight_denom = h->luma_log2_weight_denom; - slice_param->chroma_log2_weight_denom = h->chroma_log2_weight_denom; + slice_param->luma_log2_weight_denom = sl->luma_log2_weight_denom; + slice_param->chroma_log2_weight_denom = sl->chroma_log2_weight_denom; fill_vaapi_RefPicList(slice_param->RefPicList0, h->ref_list[0], h->list_count > 0 ? h->ref_count[0] : 0); fill_vaapi_RefPicList(slice_param->RefPicList1, h->ref_list[1], h->list_count > 1 ? h->ref_count[1] : 0); |