From daaefada7fb313811c2c9a781f69a2b40dd3fcb8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 29 May 2015 16:01:29 -0700 Subject: drm/vc4: Optimize BRANCH_TO_SUBLIST validation. We typically get bin_tiles_x * bin_tiles_y of these, so it makes sense to do a tiny bit of up front work to make this easier (particularly, avoid a divide, since those are terrible). Saves about half a percent of the CPU. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_drv.h | 3 ++- drivers/gpu/drm/vc4/vc4_validate.c | 16 ++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 57b889e072ef..6b3e07cf8443 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -234,7 +234,8 @@ struct vc4_exec_info { bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; uint32_t fb_width, fb_height; - uint32_t tile_alloc_init_block_size; + uint32_t tile_alloc_init_block_mask; + uint32_t tile_alloc_init_block_last; struct drm_gem_cma_object *tile_alloc_bo; /** diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 7c6925cd7929..b1f88bb8ecaa 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -286,9 +286,8 @@ validate_branch_to_sublist(VALIDATE_ARGS) } offset = *(uint32_t *)(untrusted + 0); - if (offset % exec->tile_alloc_init_block_size || - offset / exec->tile_alloc_init_block_size >= - exec->bin_tiles_x * exec->bin_tiles_y) { + if (offset & exec->tile_alloc_init_block_mask || + offset > exec->tile_alloc_init_block_last) { DRM_ERROR("VC4_PACKET_BRANCH_TO_SUB_LIST must jump to initial " "tile allocation space.\n"); return -EINVAL; @@ -496,6 +495,7 @@ validate_tile_binning_config(VALIDATE_ARGS) struct drm_gem_cma_object *tile_state_data_array; uint8_t flags; uint32_t tile_allocation_size; + uint32_t tile_alloc_init_block_size; if (!vc4_use_handle(exec, 0, VC4_MODE_TILE_ALLOC, &tile_allocation) || !vc4_use_handle(exec, 1, VC4_MODE_TSDA, &tile_state_data_array)) @@ -547,15 +547,19 @@ validate_tile_binning_config(VALIDATE_ARGS) *(uint32_t *)validated = tile_allocation->paddr; exec->tile_alloc_bo = tile_allocation; - exec->tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3)); + tile_alloc_init_block_size = 1 << (5 + ((flags >> 5) & 3)); if (exec->bin_tiles_x * exec->bin_tiles_y * - exec->tile_alloc_init_block_size > tile_allocation_size) { + tile_alloc_init_block_size > tile_allocation_size) { DRM_ERROR("tile init exceeds tile alloc size (%d vs %d)\n", exec->bin_tiles_x * exec->bin_tiles_y * - exec->tile_alloc_init_block_size, + tile_alloc_init_block_size, tile_allocation_size); return -EINVAL; } + exec->tile_alloc_init_block_mask = tile_alloc_init_block_size - 1; + exec->tile_alloc_init_block_last = tile_alloc_init_block_size * + (exec->bin_tiles_x * exec->bin_tiles_y - 1); + if (*(uint32_t *)(untrusted + 8) != 0) { DRM_ERROR("TSDA offset != 0 unsupported\n"); return -EINVAL; -- cgit v1.2.1