summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCole Nelson <colex.nelson@intel.com>2017-08-18 13:30:36 -0700
committerXiang, Haihao <haihao.xiang@intel.com>2018-11-21 13:42:45 +0800
commit08de43cf6c3dc0eb6081de35b2f3ab252e16a218 (patch)
treeca15e998f2e76add17575a0a7759ea15b8580874
parent4f8fe64e513ecc461e03e9115315a1459a4684dd (diff)
downloadlibva-intel-driver-08de43cf6c3dc0eb6081de35b2f3ab252e16a218.tar.gz
gen8_post_processing: optimization for gen8_pp_object_walker
gen8_pp_object_walker makes an unnecessary function call, checks a return value that is always zero and makes non-indexed variable assignments. All of which occurs inside nested for loops. Move non-indexed assignments outside the iterative block and inline the rest. This decreases frame drops for saturating workloads. Fixes: #252 Signed-off-by: Cole Nelson <colex.nelson@intel.com>
-rw-r--r--src/gen8_post_processing.c49
1 files changed, 20 insertions, 29 deletions
diff --git a/src/gen8_post_processing.c b/src/gen8_post_processing.c
index 860c618c..36610731 100644
--- a/src/gen8_post_processing.c
+++ b/src/gen8_post_processing.c
@@ -832,20 +832,6 @@ gen7_pp_avs_y_steps(void *private_context)
return pp_avs_context->dest_h / 16;
}
-static int
-gen7_pp_avs_set_block_parameter(struct i965_post_processing_context *pp_context, int x, int y)
-{
- struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
- struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
-
- pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
- pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
- pp_inline_parameter->grf9.constant_0 = 0xffffffff;
- pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
-
- return 0;
-}
-
static void gen7_update_src_surface_uv_offset(VADriverContextP ctx,
struct i965_post_processing_context *pp_context,
const struct i965_surface *surface)
@@ -1206,7 +1192,6 @@ gen8_pp_plx_avs_initialize(VADriverContextP ctx, struct i965_post_processing_con
pp_context->pp_x_steps = gen7_pp_avs_x_steps;
pp_context->pp_y_steps = gen7_pp_avs_y_steps;
pp_context->private_context = &pp_context->pp_avs_context;
- pp_context->pp_set_block_parameter = gen7_pp_avs_set_block_parameter;
int dst_left_edge_extend = dst_rect->x % GPU_ASM_X_OFFSET_ALIGNMENT;
pp_avs_context->dest_x = dst_rect->x - dst_left_edge_extend;
@@ -1516,6 +1501,12 @@ gen8_pp_object_walker(VADriverContextP ctx,
dri_bo *command_buffer;
unsigned int *command_ptr;
+ struct pp_avs_context *pp_avs_context = (struct pp_avs_context *)pp_context->private_context;
+ struct gen7_pp_inline_parameter *pp_inline_parameter = pp_context->pp_inline_parameter;
+
+ pp_inline_parameter->grf9.constant_0 = 0xffffffff;
+ pp_inline_parameter->grf9.sampler_load_main_video_x_scaling_step = pp_avs_context->horiz_range / pp_avs_context->src_w;
+
param_size = sizeof(struct gen7_pp_inline_parameter);
x_steps = pp_context->pp_x_steps(pp_context->private_context);
@@ -1532,20 +1523,20 @@ gen8_pp_object_walker(VADriverContextP ctx,
for (y = 0; y < y_steps; y++) {
for (x = 0; x < x_steps; x++) {
- if (!pp_context->pp_set_block_parameter(pp_context, x, y)) {
-
- *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
- *command_ptr++ = 0;
- *command_ptr++ = 0;
- *command_ptr++ = 0;
- *command_ptr++ = 0;
- *command_ptr++ = 0;
- memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
- command_ptr += (param_size >> 2);
-
- *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
- *command_ptr++ = 0;
- }
+ pp_inline_parameter->grf9.destination_block_horizontal_origin = x * 16 + pp_avs_context->dest_x;
+ pp_inline_parameter->grf9.destination_block_vertical_origin = y * 16 + pp_avs_context->dest_y;
+
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (command_length_in_dws - 2));
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ *command_ptr++ = 0;
+ memcpy(command_ptr, pp_context->pp_inline_parameter, param_size);
+ command_ptr += (param_size >> 2);
+
+ *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+ *command_ptr++ = 0;
}
}