summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHe Junyan <junyan.he@intel.com>2022-04-25 15:49:31 +0800
committerHe Junyan <junyan.he@intel.com>2022-12-01 09:45:07 +0800
commit17c416ca92c71d5d59190e35dfb9faaa074c405e (patch)
tree966fe6bf3dec0aec0964fd14ec3bf310f9917a1f
parent2f8ce208e8806c20b7a8ac2c304c1ba0e63f6813 (diff)
downloadgstreamer-17c416ca92c71d5d59190e35dfb9faaa074c405e.tar.gz
va: enable multi tile support for H265 encoder.
Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/2036>
-rw-r--r--subprojects/gst-plugins-bad/sys/va/gstvaencoder.c58
-rw-r--r--subprojects/gst-plugins-bad/sys/va/gstvaencoder.h6
-rw-r--r--subprojects/gst-plugins-bad/sys/va/gstvah265enc.c524
3 files changed, 533 insertions, 55 deletions
diff --git a/subprojects/gst-plugins-bad/sys/va/gstvaencoder.c b/subprojects/gst-plugins-bad/sys/va/gstvaencoder.c
index 12ecafaba9..9e08df713e 100644
--- a/subprojects/gst-plugins-bad/sys/va/gstvaencoder.c
+++ b/subprojects/gst-plugins-bad/sys/va/gstvaencoder.c
@@ -603,6 +603,35 @@ gst_va_encoder_get_max_slice_num (GstVaEncoder * self,
return attrib.value;
}
+gint32
+gst_va_encoder_get_slice_structure (GstVaEncoder * self,
+ VAProfile profile, VAEntrypoint entrypoint)
+{
+ VAStatus status;
+ VADisplay dpy;
+ VAConfigAttrib attrib = {.type = VAConfigAttribEncSliceStructure };
+
+ g_return_val_if_fail (GST_IS_VA_ENCODER (self), 0);
+
+ if (profile == VAProfileNone)
+ return -1;
+
+ dpy = gst_va_display_get_va_dpy (self->display);
+ status = vaGetConfigAttributes (dpy, profile, entrypoint, &attrib, 1);
+ if (status != VA_STATUS_SUCCESS) {
+ GST_WARNING_OBJECT (self, "Failed to query encoding slice structure: %s",
+ vaErrorStr (status));
+ return 0;
+ }
+
+ if (attrib.value == VA_ATTRIB_NOT_SUPPORTED) {
+ GST_WARNING_OBJECT (self, "Driver does not support slice structure");
+ return 0;
+ }
+
+ return attrib.value;
+}
+
gboolean
gst_va_encoder_get_max_num_reference (GstVaEncoder * self,
VAProfile profile, VAEntrypoint entrypoint,
@@ -767,6 +796,35 @@ gst_va_encoder_has_trellis (GstVaEncoder * self,
return attrib.value & VA_ENC_QUANTIZATION_TRELLIS_SUPPORTED;
}
+gboolean
+gst_va_encoder_has_tile (GstVaEncoder * self,
+ VAProfile profile, VAEntrypoint entrypoint)
+{
+ VAStatus status;
+ VADisplay dpy;
+ VAConfigAttrib attrib = {.type = VAConfigAttribEncTileSupport };
+
+ g_return_val_if_fail (GST_IS_VA_ENCODER (self), FALSE);
+
+ if (profile == VAProfileNone)
+ return FALSE;
+
+ dpy = gst_va_display_get_va_dpy (self->display);
+ status = vaGetConfigAttributes (dpy, profile, entrypoint, &attrib, 1);
+ if (status != VA_STATUS_SUCCESS) {
+ GST_WARNING_OBJECT (self, "Failed to query the tile: %s",
+ vaErrorStr (status));
+ return FALSE;
+ }
+
+ if (attrib.value == VA_ATTRIB_NOT_SUPPORTED) {
+ GST_WARNING_OBJECT (self, "Driver does not support tile");
+ return FALSE;
+ }
+
+ return attrib.value > 0;
+}
+
guint32
gst_va_encoder_get_rtformat (GstVaEncoder * self,
VAProfile profile, VAEntrypoint entrypoint)
diff --git a/subprojects/gst-plugins-bad/sys/va/gstvaencoder.h b/subprojects/gst-plugins-bad/sys/va/gstvaencoder.h
index 3f13291f47..290fb3531a 100644
--- a/subprojects/gst-plugins-bad/sys/va/gstvaencoder.h
+++ b/subprojects/gst-plugins-bad/sys/va/gstvaencoder.h
@@ -64,6 +64,9 @@ gboolean gst_va_encoder_has_profile (GstVaEncoder * self,
gint gst_va_encoder_get_max_slice_num (GstVaEncoder * self,
VAProfile profile,
VAEntrypoint entrypoint);
+gint32 gst_va_encoder_get_slice_structure (GstVaEncoder * self,
+ VAProfile profile,
+ VAEntrypoint entrypoint);
gboolean gst_va_encoder_get_max_num_reference (GstVaEncoder * self,
VAProfile profile,
VAEntrypoint entrypoint,
@@ -81,6 +84,9 @@ guint32 gst_va_encoder_get_quality_level (GstVaEncoder * self,
gboolean gst_va_encoder_has_trellis (GstVaEncoder * self,
VAProfile profile,
VAEntrypoint entrypoint);
+gboolean gst_va_encoder_has_tile (GstVaEncoder * self,
+ VAProfile profile,
+ VAEntrypoint entrypoint);
guint32 gst_va_encoder_get_rtformat (GstVaEncoder * self,
VAProfile profile,
VAEntrypoint entrypoint);
diff --git a/subprojects/gst-plugins-bad/sys/va/gstvah265enc.c b/subprojects/gst-plugins-bad/sys/va/gstvah265enc.c
index 09eda53662..bd29586b12 100644
--- a/subprojects/gst-plugins-bad/sys/va/gstvah265enc.c
+++ b/subprojects/gst-plugins-bad/sys/va/gstvah265enc.c
@@ -90,6 +90,8 @@ enum
PROP_RATE_CONTROL,
PROP_CPB_SIZE,
PROP_AUD,
+ PROP_NUM_TILE_COLS,
+ PROP_NUM_TILE_ROWS,
N_PROPERTIES
};
@@ -113,6 +115,11 @@ static GstObjectClass *parent_class = NULL;
#define MAX_GOP_SIZE 1024
+/* The max tiles in column according to spec A1 */
+#define MAX_COL_TILES 20
+/* The max tiles in row according to spec A1 */
+#define MAX_ROW_TILES 22
+
/* *INDENT-OFF* */
struct _GstVaH265EncClass
{
@@ -150,6 +157,8 @@ struct _GstVaH265Enc
gboolean aud;
guint32 mbbrc;
guint32 num_slices;
+ guint32 num_tile_cols;
+ guint32 num_tile_rows;
guint32 cpb_size;
guint32 target_percentage;
guint32 target_usage;
@@ -180,11 +189,27 @@ struct _GstVaH265Enc
guint min_cr;
gboolean aud;
- guint32 num_slices;
guint32 packed_headers;
struct
{
+ guint32 num_slices;
+ /* start address in CTUs */
+ guint32 *slice_segment_address;
+ /* CTUs in this slice */
+ guint32 *num_ctu_in_slice;
+
+ gboolean slice_span_tiles;
+ guint32 num_tile_cols;
+ guint32 num_tile_rows;
+ /* CTUs in each tile column */
+ guint32 *tile_ctu_cols;
+ /* CTUs in each tile row */
+ guint32 *tile_ctu_rows;
+ } partition;
+
+ struct
+ {
guint8 log2_min_luma_coding_block_size_minus3;
guint8 log2_diff_max_min_luma_coding_block_size;
guint8 log2_min_transform_block_size_minus2;
@@ -447,6 +472,12 @@ _enc_frame (GstVideoCodecFrame * frame)
return enc_frame;
}
+static inline gboolean
+_is_tile_enabled (GstVaH265Enc * self)
+{
+ return self->partition.num_tile_cols * self->partition.num_tile_rows > 1;
+}
+
static GstH265NalUnitType
_h265_nal_unit_type (GstVaH265EncFrame * frame)
{
@@ -1290,12 +1321,12 @@ _h265_fill_picture_parameter (GstVaH265Enc * self, GstVaH265EncFrame * frame,
VAEncPictureParameterBufferHEVC * pic_param, gint collocated_poc)
{
GstVaBaseEnc *base = GST_VA_BASE_ENC (self);
- gboolean tiles_enabled_flag;
guint8 num_ref_idx_l0_default_active_minus1 = 0;
guint8 num_ref_idx_l1_default_active_minus1 = 0;
guint hierarchical_level_plus1 = 0;
guint i;
+ /* *INDENT-OFF* */
if (self->gop.b_pyramid) {
/* I/P is the base hierarchical level 0, L0 level B is 1, and so on. */
hierarchical_level_plus1 = 1;
@@ -1315,10 +1346,6 @@ _h265_fill_picture_parameter (GstVaH265Enc * self, GstVaH265EncFrame * frame,
(self->gop.backward_ref_num > 0 ? self->gop.backward_ref_num - 1 : 0);
}
- /* TODO: multi tile support. */
- tiles_enabled_flag = 0;
-
- /* *INDENT-OFF* */
*pic_param = (VAEncPictureParameterBufferHEVC) {
.decoded_curr_pic.picture_id =
gst_va_encode_picture_get_reconstruct_surface (frame->picture),
@@ -1357,10 +1384,10 @@ _h265_fill_picture_parameter (GstVaH265Enc * self, GstVaH265EncFrame * frame,
.weighted_bipred_flag = self->features.weighted_bipred_flag,
.transquant_bypass_enabled_flag =
self->features.transquant_bypass_enabled_flag,
- .tiles_enabled_flag = tiles_enabled_flag,
+ .tiles_enabled_flag = _is_tile_enabled (self),
.entropy_coding_sync_enabled_flag = 0,
/* When we enable multi tiles, enable this. */
- .loop_filter_across_tiles_enabled_flag = tiles_enabled_flag,
+ .loop_filter_across_tiles_enabled_flag = _is_tile_enabled (self),
.pps_loop_filter_across_slices_enabled_flag = 1,
/* Should not change the scaling list, not used now */
.scaling_list_data_present_flag =
@@ -1426,6 +1453,24 @@ _h265_fill_picture_parameter (GstVaH265Enc * self, GstVaH265EncFrame * frame,
pic_param->collocated_ref_pic_index = 0xFF;
}
+ /* Setup tile info */
+ if (pic_param->pic_fields.bits.tiles_enabled_flag) {
+ /* Always set loop filter across tiles enabled now */
+ pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag = 1;
+
+ pic_param->num_tile_columns_minus1 = self->partition.num_tile_cols - 1;
+ pic_param->num_tile_rows_minus1 = self->partition.num_tile_rows - 1;
+
+ /* The VA row_height_minus1 and column_width_minus1 size is 1 smaller
+ than the MAX_COL_TILES and MAX_ROW_TILES, which means the driver
+ can deduce the last tile's size based on the picture info. We need
+ to take care of the array size here. */
+ for (i = 0; i < MIN (self->partition.num_tile_cols, 19); i++)
+ pic_param->column_width_minus1[i] = self->partition.tile_ctu_cols[i] - 1;
+ for (i = 0; i < MIN (self->partition.num_tile_rows, 21); i++)
+ pic_param->row_height_minus1[i] = self->partition.tile_ctu_rows[i] - 1;
+ }
+
return TRUE;
}
@@ -1615,33 +1660,16 @@ _h265_add_slices (GstVaH265Enc * self,
gint negative_pocs[16], guint num_negative_pics,
gint positive_pocs[16], guint num_positive_pics)
{
- guint ctu_size;
- guint ctus_per_slice, ctus_mod_slice, cur_slice_ctus;
- guint last_ctu_index;
guint i_slice;
VAEncSliceParameterBufferHEVC slice;
GstH265SliceHdr slice_hdr;
- ctu_size = self->ctu_width * self->ctu_height;
-
- g_assert (self->num_slices && self->num_slices < ctu_size);
-
- ctus_per_slice = ctu_size / self->num_slices;
- ctus_mod_slice = ctu_size % self->num_slices;
- last_ctu_index = 0;
-
- for (i_slice = 0; i_slice < self->num_slices; i_slice++) {
- cur_slice_ctus = ctus_per_slice;
- /* Scatter the remainder to each slice */
- if (ctus_mod_slice) {
- ++cur_slice_ctus;
- --ctus_mod_slice;
- }
-
- if (!_h265_fill_slice_parameter (self, frame, last_ctu_index,
- cur_slice_ctus, (i_slice == self->num_slices - 1),
- list_forward, list_forward_num,
- list_backward, list_backward_num, &slice))
+ for (i_slice = 0; i_slice < self->partition.num_slices; i_slice++) {
+ if (!_h265_fill_slice_parameter (self, frame,
+ self->partition.slice_segment_address[i_slice],
+ self->partition.num_ctu_in_slice[i_slice],
+ (i_slice == self->partition.num_slices - 1), list_forward,
+ list_forward_num, list_backward, list_backward_num, &slice))
return FALSE;
if (!_h265_add_slice_parameter (self, frame, &slice))
@@ -1656,10 +1684,6 @@ _h265_add_slices (GstVaH265Enc * self,
if (!_h265_add_slice_header (self, frame, &slice_hdr))
return FALSE;
}
-
- /* set calculation for next slice */
- last_ctu_index += cur_slice_ctus;
- g_assert (last_ctu_index <= ctu_size);
}
return TRUE;
@@ -2258,7 +2282,9 @@ gst_va_h265_enc_reset_state (GstVaBaseEnc * base)
GST_OBJECT_LOCK (self);
self->features.use_trellis = self->prop.use_trellis;
self->aud = self->prop.aud;
- self->num_slices = self->prop.num_slices;
+ self->partition.num_slices = self->prop.num_slices;
+ self->partition.num_tile_cols = self->prop.num_tile_cols;
+ self->partition.num_tile_rows = self->prop.num_tile_rows;
self->gop.idr_period = self->prop.key_int_max;
self->gop.num_bframes = self->prop.num_bframes;
self->gop.b_pyramid = self->prop.b_pyramid;
@@ -2297,6 +2323,12 @@ gst_va_h265_enc_reset_state (GstVaBaseEnc * base)
self->packed_headers = 0;
+ self->partition.slice_span_tiles = FALSE;
+ g_clear_pointer (&self->partition.slice_segment_address, g_free);
+ g_clear_pointer (&self->partition.num_ctu_in_slice, g_free);
+ g_clear_pointer (&self->partition.tile_ctu_cols, g_free);
+ g_clear_pointer (&self->partition.tile_ctu_rows, g_free);
+
self->features.log2_min_luma_coding_block_size_minus3 = 0;
self->features.log2_diff_max_min_luma_coding_block_size = 0;
self->features.log2_diff_max_min_luma_coding_block_size = 0;
@@ -2489,37 +2521,375 @@ out:
update_property (bool, obj, old_val, new_val, prop_id)
static void
-_h265_validate_parameters (GstVaH265Enc * self)
+_h265_calculate_tile_partition (GstVaH265Enc * self)
+{
+ guint32 ctu_per_slice;
+ guint32 left_slices;
+ gint32 i, j, k;
+ guint32 ctu_tile_width_accu[MAX_COL_TILES + 1];
+ guint32 ctu_tile_height_accu[MAX_ROW_TILES + 1];
+ /* CTB address in tile scan.
+ Add one as sentinel, hold val to calculate ctu_num */
+ guint32 *tile_slice_address =
+ g_malloc ((self->partition.num_slices + 1) * sizeof (guint32));
+ /* map the CTB address in tile scan to CTB raster scan of a picture. */
+ guint32 *tile_slice_address_map =
+ g_malloc (self->ctu_width * self->ctu_height * sizeof (guint32));
+
+ self->partition.slice_segment_address =
+ g_malloc (self->partition.num_slices * sizeof (guint32));
+ self->partition.num_ctu_in_slice =
+ g_malloc (self->partition.num_slices * sizeof (guint32));
+ self->partition.tile_ctu_cols = g_malloc (MAX_COL_TILES * sizeof (guint32));
+ self->partition.tile_ctu_rows = g_malloc (MAX_ROW_TILES * sizeof (guint32));
+
+ /* firstly uniformly separate CTUs into tiles, as the spec 6.5.1 define */
+ for (i = 0; i < self->partition.num_tile_cols; i++)
+ self->partition.tile_ctu_cols[i] =
+ ((i + 1) * self->ctu_width) / self->partition.num_tile_cols -
+ (i * self->ctu_width) / self->partition.num_tile_cols;
+ for (i = 0; i < self->partition.num_tile_rows; i++)
+ self->partition.tile_ctu_rows[i] =
+ ((i + 1) * self->ctu_height) / self->partition.num_tile_rows -
+ (i * self->ctu_height) / self->partition.num_tile_rows;
+
+ /* The requirement that the slice should not span tiles. Firstly we
+ should scatter slices uniformly into each tile, bigger tile gets
+ more slices. Then we should assign CTUs within one tile uniformly
+ to each slice in that tile. */
+ if (!self->partition.slice_span_tiles) {
+ guint32 *slices_per_tile = g_malloc (self->partition.num_tile_cols *
+ self->partition.num_tile_rows * sizeof (guint32));
+
+ ctu_per_slice = (self->ctu_width * self->ctu_height +
+ self->partition.num_slices - 1) / self->partition.num_slices;
+ g_assert (ctu_per_slice > 0);
+ left_slices = self->partition.num_slices;
+
+ for (i = 0;
+ i < self->partition.num_tile_cols * self->partition.num_tile_rows;
+ i++) {
+ slices_per_tile[i] = 1;
+ left_slices--;
+ }
+ while (left_slices) {
+ /* Find the biggest CTUs/slices, and assign more. */
+ gfloat largest = 0.0f;
+ k = -1;
+ for (i = 0;
+ i < self->partition.num_tile_cols * self->partition.num_tile_rows;
+ i++) {
+ gfloat f;
+ f = ((gfloat)
+ (self->partition.tile_ctu_cols[i % self->partition.num_tile_cols] *
+ self->partition.tile_ctu_rows
+ [i / self->partition.num_tile_cols])) /
+ (gfloat) slices_per_tile[i];
+ g_assert (f >= 1.0f);
+ if (f > largest) {
+ k = i;
+ largest = f;
+ }
+ }
+
+ g_assert (k >= 0);
+ slices_per_tile[k]++;
+ left_slices--;
+ }
+
+ /* Assign CTUs in one tile uniformly to each slice. Note: the slice start
+ address is CTB address in tile scan(see spec 6.5), that is, we accumulate
+ all CTUs in tile0, then tile1, and tile2..., not from the picture's
+ perspective. */
+ tile_slice_address[0] = 0;
+ k = 1;
+ for (i = 0; i < self->partition.num_tile_rows; i++) {
+ for (j = 0; j < self->partition.num_tile_cols; j++) {
+ guint32 s_num = slices_per_tile[i * self->partition.num_tile_cols + j];
+ guint32 one_tile_ctus =
+ self->partition.tile_ctu_cols[j] * self->partition.tile_ctu_rows[i];
+ guint32 s;
+
+ GST_LOG_OBJECT (self, "Tile(row %d col %d), has CTU in col %d,"
+ " CTU in row is %d, total CTU %d, assigned %d slices", i, j,
+ self->partition.tile_ctu_cols[j], self->partition.tile_ctu_rows[i],
+ one_tile_ctus, s_num);
+
+ g_assert (s_num > 0);
+ for (s = 0; s < s_num; s++) {
+ tile_slice_address[k] = tile_slice_address[k - 1] +
+ ((s + 1) * one_tile_ctus) / s_num - (s * one_tile_ctus) / s_num;
+ self->partition.num_ctu_in_slice[k - 1] =
+ tile_slice_address[k] - tile_slice_address[k - 1];
+ k++;
+ }
+ }
+ }
+
+ g_assert (k == self->partition.num_slices + 1);
+ /* Calculate the last one */
+ self->partition.num_ctu_in_slice[self->partition.num_slices - 1] =
+ self->ctu_width * self->ctu_height -
+ tile_slice_address[self->partition.num_slices - 1];
+
+ g_free (slices_per_tile);
+ }
+ /* The easy way, just assign CTUs to each slice uniformly */
+ else {
+ guint ctu_size, ctu_mod_slice, cur_slice_ctu, last_ctu_index;
+
+ ctu_size = self->ctu_width * self->ctu_height;
+
+ ctu_per_slice = ctu_size / self->partition.num_slices;
+ ctu_mod_slice = ctu_size % self->partition.num_slices;
+ last_ctu_index = 0;
+
+ for (i = 0; i < self->partition.num_slices; i++) {
+ cur_slice_ctu = ctu_per_slice;
+ /* Scatter the remainder to each slice */
+ if (ctu_mod_slice) {
+ ++cur_slice_ctu;
+ --ctu_mod_slice;
+ }
+
+ tile_slice_address[i] = last_ctu_index;
+ self->partition.num_ctu_in_slice[i] = cur_slice_ctu;
+
+ /* set calculation for next slice */
+ last_ctu_index += cur_slice_ctu;
+ g_assert (last_ctu_index <= ctu_size);
+ }
+ }
+
+ /* Build the map to specifying the conversion between a CTB address in CTB
+ raster scan of a picture and a CTB address in tile scan(see spec 6.5.1
+ for details). */
+ ctu_tile_width_accu[0] = 0;
+ for (i = 1; i <= self->partition.num_tile_cols; i++)
+ ctu_tile_width_accu[i] =
+ ctu_tile_width_accu[i - 1] + self->partition.tile_ctu_cols[i - 1];
+
+ ctu_tile_height_accu[0] = 0;
+ for (i = 1; i <= self->partition.num_tile_rows; i++)
+ ctu_tile_height_accu[i] =
+ ctu_tile_height_accu[i - 1] + self->partition.tile_ctu_rows[i - 1];
+
+ for (k = 0; k < self->ctu_width * self->ctu_height; k++) {
+ /* The ctu coordinate in the picture. */
+ guint32 x = k % self->ctu_width;
+ guint32 y = k / self->ctu_width;
+ /* The ctu coordinate in the tile mode. */
+ guint32 tile_x = 0;
+ guint32 tile_y = 0;
+ /* The index of the CTU in the tile mode. */
+ guint32 tso = 0;
+
+ for (i = 0; i < self->partition.num_tile_cols; i++)
+ if (x >= ctu_tile_width_accu[i])
+ tile_x = i;
+ g_assert (tile_x <= self->partition.num_tile_cols - 1);
+
+ for (j = 0; j < self->partition.num_tile_rows; j++)
+ if (y >= ctu_tile_height_accu[j])
+ tile_y = j;
+ g_assert (tile_y <= self->partition.num_tile_rows - 1);
+
+ /* add all ctus in the tiles the same line before us */
+ for (i = 0; i < tile_x; i++)
+ tso += self->partition.tile_ctu_rows[tile_y] *
+ self->partition.tile_ctu_cols[i];
+
+ /* add all ctus in the tiles above us */
+ for (j = 0; j < tile_y; j++)
+ tso += self->ctu_width * self->partition.tile_ctu_rows[j];
+
+ /* add the ctus inside the same tile before us */
+ tso += (y - ctu_tile_height_accu[tile_y]) *
+ self->partition.tile_ctu_cols[tile_x]
+ + x - ctu_tile_width_accu[tile_x];
+
+ g_assert (tso < self->ctu_width * self->ctu_height);
+
+ tile_slice_address_map[tso] = k;
+ }
+
+ for (i = 0; i < self->partition.num_slices; i++)
+ self->partition.slice_segment_address[i] =
+ tile_slice_address_map[tile_slice_address[i]];
+
+ g_free (tile_slice_address);
+ g_free (tile_slice_address_map);
+}
+
+static void
+_h265_calculate_slice_partition (GstVaH265Enc * self, gint32 slice_structure)
+{
+ guint ctu_size;
+ guint ctus_per_slice, ctus_mod_slice, cur_slice_ctus;
+ guint last_ctu_index;
+ guint i_slice;
+
+ /* TODO: consider other slice structure modes */
+ if (!(slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) &&
+ !(slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS)) {
+ GST_INFO_OBJECT (self, "Driver slice structure is %x, does not support"
+ " ARBITRARY_MACROBLOCKS mode, fallback to no slice partition",
+ slice_structure);
+ self->partition.num_slices = 1;
+ }
+
+ self->partition.slice_segment_address =
+ g_malloc (self->partition.num_slices * sizeof (guint32));
+ self->partition.num_ctu_in_slice =
+ g_malloc (self->partition.num_slices * sizeof (guint32));
+
+ ctu_size = self->ctu_width * self->ctu_height;
+
+ g_assert (self->partition.num_slices &&
+ self->partition.num_slices < ctu_size);
+
+ ctus_per_slice = ctu_size / self->partition.num_slices;
+ ctus_mod_slice = ctu_size % self->partition.num_slices;
+ last_ctu_index = 0;
+
+ for (i_slice = 0; i_slice < self->partition.num_slices; i_slice++) {
+ cur_slice_ctus = ctus_per_slice;
+ /* Scatter the remainder to each slice */
+ if (ctus_mod_slice) {
+ ++cur_slice_ctus;
+ --ctus_mod_slice;
+ }
+
+ /* Align start address to the row begin */
+ if (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS) {
+ guint ctu_width_round_factor;
+
+ ctu_width_round_factor =
+ self->ctu_width - (cur_slice_ctus % self->ctu_width);
+ cur_slice_ctus += ctu_width_round_factor;
+ if ((last_ctu_index + cur_slice_ctus) > ctu_size)
+ cur_slice_ctus = ctu_size - last_ctu_index;
+ }
+
+ self->partition.slice_segment_address[i_slice] = last_ctu_index;
+ self->partition.num_ctu_in_slice[i_slice] = cur_slice_ctus;
+
+ /* set calculation for next slice */
+ last_ctu_index += cur_slice_ctus;
+ g_assert (last_ctu_index <= ctu_size);
+ }
+}
+
+static gboolean
+_h265_setup_slice_and_tile_partition (GstVaH265Enc * self)
{
GstVaBaseEnc *base = GST_VA_BASE_ENC (self);
gint32 max_slices;
+ gint32 slice_structure;
/* Ensure the num_slices provided by the user not exceed the limit
* of the number of slices permitted by the stream and by the
* hardware. */
- g_assert (self->num_slices >= 1);
+ g_assert (self->partition.num_slices >= 1);
max_slices = gst_va_encoder_get_max_slice_num (base->encoder,
base->profile, GST_VA_BASE_ENC_ENTRYPOINT (base));
- if (self->num_slices > max_slices)
- self->num_slices = max_slices;
+ if (self->partition.num_slices > max_slices)
+ self->partition.num_slices = max_slices;
/* The stream size limit. */
- if (self->num_slices > ((self->ctu_width * self->ctu_height + 1) / 2))
- self->num_slices = ((self->ctu_width * self->ctu_height + 1) / 2);
+ if (self->partition.num_slices >
+ ((self->ctu_width * self->ctu_height + 1) / 2))
+ self->partition.num_slices = ((self->ctu_width * self->ctu_height + 1) / 2);
- update_property_uint (base, &self->prop.num_slices, self->num_slices,
- PROP_NUM_SLICES);
+ slice_structure = gst_va_encoder_get_slice_structure (base->encoder,
+ base->profile, GST_VA_BASE_ENC_ENTRYPOINT (base));
- /* Ensure trellis. */
- if (self->features.use_trellis &&
- !gst_va_encoder_has_trellis (base->encoder, base->profile,
- GST_VA_BASE_ENC_ENTRYPOINT (base))) {
- GST_INFO_OBJECT (self, "The trellis is not supported");
- self->features.use_trellis = FALSE;
+ if (_is_tile_enabled (self)) {
+ const GstVaH265LevelLimits *level_limits;
+ guint i;
+
+ if (!gst_va_encoder_has_tile (base->encoder,
+ base->profile, GST_VA_BASE_ENC_ENTRYPOINT (base))) {
+ self->partition.num_tile_cols = 1;
+ self->partition.num_tile_rows = 1;
+ }
+
+ level_limits = NULL;
+ for (i = 0; i < G_N_ELEMENTS (_va_h265_level_limits); i++) {
+ if (_va_h265_level_limits[i].level_idc == self->level_idc) {
+ level_limits = &_va_h265_level_limits[i];
+ break;
+ }
+ }
+ g_assert (level_limits);
+
+ if (self->partition.num_tile_cols > level_limits->MaxTileColumns) {
+ GST_INFO_OBJECT (self, "num_tile_cols:%d exceeds MaxTileColumns:%d"
+ " of level %s", self->partition.num_tile_cols,
+ level_limits->MaxTileColumns, self->level_str);
+ self->partition.num_tile_cols = level_limits->MaxTileColumns;
+ }
+ if (self->partition.num_tile_rows > level_limits->MaxTileRows) {
+ GST_INFO_OBJECT (self, "num_tile_rows:%d exceeds MaxTileRows:%d"
+ " of level %s", self->partition.num_tile_rows,
+ level_limits->MaxTileRows, self->level_str);
+ self->partition.num_tile_rows = level_limits->MaxTileRows;
+ }
+
+ if (self->partition.num_tile_cols > self->ctu_width) {
+ GST_INFO_OBJECT (self,
+ "Only %d CTUs in width, not enough to split into %d tile columns",
+ self->ctu_width, self->partition.num_tile_cols);
+ self->partition.num_tile_cols = self->ctu_width;
+ }
+ if (self->partition.num_tile_rows > self->ctu_height) {
+ GST_INFO_OBJECT (self,
+ "Only %d CTUs in height, not enough to split into %d tile rows",
+ self->ctu_height, self->partition.num_tile_rows);
+ self->partition.num_tile_rows = self->ctu_height;
+ }
+
+ /* Some driver require that the slice should not span tiles,
+ we need to increase slice number if needed. */
+ if (gst_va_display_is_implementation (base->display,
+ GST_VA_IMPLEMENTATION_INTEL_IHD)) {
+ if (self->partition.num_slices <
+ self->partition.num_tile_cols * self->partition.num_tile_rows) {
+ if (self->partition.num_tile_cols * self->partition.num_tile_rows >
+ max_slices) {
+ GST_ERROR_OBJECT (self, "The slice can not span tiles, but total"
+ " tile num %d is bigger than max_slices %d",
+ self->partition.num_tile_cols * self->partition.num_tile_rows,
+ max_slices);
+ return FALSE;
+ } else {
+ GST_INFO_OBJECT (self, "The num_slices %d is smaller than tile"
+ " num %d. The slice can not span tiles, so set the num-slices"
+ " to tile num.", self->partition.num_slices,
+ self->partition.num_tile_cols * self->partition.num_tile_rows);
+ self->partition.num_slices =
+ self->partition.num_tile_cols * self->partition.num_tile_rows;
+ }
+ }
+
+ self->partition.slice_span_tiles = FALSE;
+ } else {
+ self->partition.slice_span_tiles = TRUE;
+ }
+
+ _h265_calculate_tile_partition (self);
+ } else {
+ _h265_calculate_slice_partition (self, slice_structure);
}
- update_property_bool (base, &self->prop.use_trellis,
- self->features.use_trellis, PROP_TRELLIS);
+ update_property_uint (base, &self->prop.num_slices,
+ self->partition.num_slices, PROP_NUM_SLICES);
+ update_property_uint (base, &self->prop.num_tile_cols,
+ self->partition.num_tile_cols, PROP_NUM_TILE_COLS);
+ update_property_uint (base, &self->prop.num_tile_rows,
+ self->partition.num_tile_rows, PROP_NUM_TILE_ROWS);
+
+ return TRUE;
}
/* Normalizes bitrate (and CPB size) for HRD conformance */
@@ -3021,7 +3391,7 @@ _h265_calculate_coded_size (GstVaH265Enc * self)
codedbuf_size += 4 + GST_ROUND_UP_8 (MAX_PPS_HDR_SIZE) / 8;
/* Account for slice header */
- codedbuf_size += self->num_slices * (4 +
+ codedbuf_size += self->partition.num_slices * (4 +
GST_ROUND_UP_8 (MAX_SLICE_HDR_SIZE + MAX_SHORT_TERM_REFPICSET_SIZE) / 8);
/* TODO: Only YUV 4:2:0 formats are supported for now.
@@ -3575,6 +3945,19 @@ print_options:
self->features.weighted_pred_flag,
self->features.weighted_bipred_flag,
self->features.transquant_bypass_enabled_flag);
+
+ /* Ensure trellis. */
+ if (self->features.use_trellis &&
+ !gst_va_encoder_has_trellis (base->encoder, base->profile,
+ GST_VA_BASE_ENC_ENTRYPOINT (base))) {
+ GST_INFO_OBJECT (self, "The trellis is not supported");
+ self->features.use_trellis = FALSE;
+ }
+
+ if (self->prop.use_trellis != self->features.use_trellis) {
+ self->prop.use_trellis = self->features.use_trellis;
+ g_object_notify_by_pspec (G_OBJECT (self), properties[PROP_TRELLIS]);
+ }
}
/* We need to decide the profile and entrypoint before call this.
@@ -3822,8 +4205,6 @@ gst_va_h265_enc_reconfig (GstVaBaseEnc * base)
base->width, base->height, self->ctu_width, self->ctu_height,
GST_TIME_ARGS (base->frame_duration));
- _h265_validate_parameters (self);
-
if (!_h265_ensure_rate_control (self))
return FALSE;
@@ -3837,6 +4218,9 @@ gst_va_h265_enc_reconfig (GstVaBaseEnc * base)
_h265_calculate_coded_size (self);
+ if (!_h265_setup_slice_and_tile_partition (self))
+ return FALSE;
+
if (!_h265_init_packed_headers (self))
return FALSE;
@@ -4086,6 +4470,12 @@ gst_va_h265_enc_set_property (GObject * object, guint prop_id,
g_atomic_int_set (&GST_VA_BASE_ENC (self)->reconf, TRUE);
already_effect = TRUE;
break;
+ case PROP_NUM_TILE_COLS:
+ self->prop.num_tile_cols = g_value_get_uint (value);
+ break;
+ case PROP_NUM_TILE_ROWS:
+ self->prop.num_tile_rows = g_value_get_uint (value);
+ break;
case PROP_RATE_CONTROL:
self->prop.rc_ctrl = g_value_get_enum (value);
g_atomic_int_set (&GST_VA_BASE_ENC (self)->reconf, TRUE);
@@ -4169,6 +4559,12 @@ gst_va_h265_enc_get_property (GObject * object, guint prop_id,
case PROP_TARGET_USAGE:
g_value_set_uint (value, self->prop.target_usage);
break;
+ case PROP_NUM_TILE_COLS:
+ g_value_set_uint (value, self->prop.num_tile_cols);
+ break;
+ case PROP_NUM_TILE_ROWS:
+ g_value_set_uint (value, self->prop.num_tile_rows);
+ break;
case PROP_RATE_CONTROL:
g_value_set_enum (value, self->prop.rc_ctrl);
break;
@@ -4466,6 +4862,24 @@ gst_va_h265_enc_class_init (gpointer g_klass, gpointer class_data)
"The desired max CPB size in Kb (0: auto-calculate)", 0, 2000 * 1024, 0,
param_flags);
+ /**
+ * GstVaH265Enc:num-tile-cols:
+ *
+ * The number of tile columns when tile encoding is enabled.
+ */
+ properties[PROP_NUM_TILE_COLS] = g_param_spec_uint ("num-tile-cols",
+ "number of tile columns", "The number of columns for tile encoding",
+ 1, MAX_COL_TILES, 1, param_flags);
+
+ /**
+ * GstVaH265Enc:num-tile-rows:
+ *
+ * The number of tile rows when tile encoding is enabled.
+ */
+ properties[PROP_NUM_TILE_ROWS] = g_param_spec_uint ("num-tile-rows",
+ "number of tile rows", "The number of rows for tile encoding",
+ 1, MAX_ROW_TILES, 1, param_flags);
+
if (vah265enc_class->rate_control_type > 0) {
properties[PROP_RATE_CONTROL] = g_param_spec_enum ("rate-control",
"rate control mode", "The desired rate control mode for the encoder",