From 71a3914556983a1ee9184c1ebccf062ca9c0c416 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 10 Jun 2015 12:50:14 -0700 Subject: drm/vc4: Sync validation code from userspace. We get validation that direct texturing doesn't address off the start of the array, some more symbolic #defines, and a fix for the dithering bits. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_packet.h | 167 ++++++++++++++++++++++++----- drivers/gpu/drm/vc4/vc4_validate.c | 115 ++++++++++---------- drivers/gpu/drm/vc4/vc4_validate_shaders.c | 116 ++++++++++++++------ 3 files changed, 287 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_packet.h b/drivers/gpu/drm/vc4/vc4_packet.h index eef5be99a046..ec8586aca71f 100644 --- a/drivers/gpu/drm/vc4/vc4_packet.h +++ b/drivers/gpu/drm/vc4/vc4_packet.h @@ -24,6 +24,8 @@ #ifndef VC4_PACKET_H #define VC4_PACKET_H +#include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */ + enum vc4_packet { VC4_PACKET_HALT = 0, VC4_PACKET_NOP = 1, @@ -81,6 +83,39 @@ enum vc4_packet { VC4_PACKET_GEM_HANDLES = 254, } __attribute__ ((__packed__)); +#define VC4_PACKET_HALT_SIZE 1 +#define VC4_PACKET_NOP_SIZE 1 +#define VC4_PACKET_FLUSH_SIZE 1 +#define VC4_PACKET_FLUSH_ALL_SIZE 1 +#define VC4_PACKET_START_TILE_BINNING_SIZE 1 +#define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1 +#define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1 +#define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5 +#define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1 +#define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1 +#define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7 +#define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7 +#define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14 +#define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10 +#define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2 +#define VC4_PACKET_GL_SHADER_STATE_SIZE 5 +#define VC4_PACKET_NV_SHADER_STATE_SIZE 5 +#define VC4_PACKET_CONFIGURATION_BITS_SIZE 4 +#define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5 +#define VC4_PACKET_POINT_SIZE_SIZE 5 +#define VC4_PACKET_LINE_WIDTH_SIZE 5 +#define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3 +#define VC4_PACKET_DEPTH_OFFSET_SIZE 5 +#define VC4_PACKET_CLIP_WINDOW_SIZE 9 +#define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5 +#define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9 +#define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9 +#define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16 +#define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11 +#define VC4_PACKET_CLEAR_COLORS_SIZE 14 +#define VC4_PACKET_TILE_COORDINATES_SIZE 3 +#define VC4_PACKET_GEM_HANDLES_SIZE 9 + /** @{ * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * VC4_PACKET_TILE_RENDERING_MODE_CONFIG. @@ -96,6 +131,7 @@ enum vc4_packet { * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) */ +#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3) #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2) #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1) #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0) @@ -104,18 +140,19 @@ enum vc4_packet { /** @{ * - * byte 1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and + * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL */ -#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 7) -#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 6) -#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 5) -#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 4) - -#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 (0 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER (1 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_BGR565 (2 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_MASK (3 << 0) +#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15) +#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14) +#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13) +#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12) + +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) +#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 +#define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0 +#define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1 +#define VC4_LOADSTORE_TILE_BUFFER_BGR565 2 /** @} */ /** @{ @@ -123,21 +160,24 @@ enum vc4_packet { * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL */ +#define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6) +#define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6 #define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) /** The values of the field are VC4_TILING_FORMAT_* */ -#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK (3 << 4) -#define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 4 - - -#define VC4_LOADSTORE_TILE_BUFFER_NONE (0 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_COLOR (1 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_ZS (2 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_Z (3 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK (4 << 0) -#define VC4_LOADSTORE_TILE_BUFFER_FULL (5 << 0) +#define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4) +#define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4 + +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0) +#define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0 +#define VC4_LOADSTORE_TILE_BUFFER_NONE 0 +#define VC4_LOADSTORE_TILE_BUFFER_COLOR 1 +#define VC4_LOADSTORE_TILE_BUFFER_ZS 2 +#define VC4_LOADSTORE_TILE_BUFFER_Z 3 +#define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4 +#define VC4_LOADSTORE_TILE_BUFFER_FULL 5 /** @} */ #define VC4_INDEX_BUFFER_U8 (0 << 4) @@ -206,21 +246,28 @@ enum vc4_packet { #define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) /** The values of the field are VC4_TILING_FORMAT_* */ -#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK (3 << 6) +#define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) #define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6 #define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) #define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) #define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) -#define VC4_RENDER_CONFIG_FORMAT_BGR565 (0 << 2) -#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 (1 << 2) -#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED (2 << 2) -#define VC4_RENDER_CONFIG_FORMAT_MASK (3 << 2) +#define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2) +#define VC4_RENDER_CONFIG_FORMAT_SHIFT 2 +#define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0 +#define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 +#define VC4_RENDER_CONFIG_FORMAT_BGR565 2 #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) #define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) +#define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) enum vc4_texture_data_type { VC4_TEXTURE_TYPE_RGBA8888 = 0, @@ -243,4 +290,74 @@ enum vc4_texture_data_type { VC4_TEXTURE_TYPE_YUV422R = 17, }; +#define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) +#define VC4_TEX_P0_OFFSET_SHIFT 12 +#define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10) +#define VC4_TEX_P0_CSWIZ_SHIFT 10 +#define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9) +#define VC4_TEX_P0_CMMODE_SHIFT 9 +#define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8) +#define VC4_TEX_P0_FLIPY_SHIFT 8 +#define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4) +#define VC4_TEX_P0_TYPE_SHIFT 4 +#define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0) +#define VC4_TEX_P0_MIPLVLS_SHIFT 0 + +#define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31) +#define VC4_TEX_P1_TYPE4_SHIFT 31 +#define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20) +#define VC4_TEX_P1_HEIGHT_SHIFT 20 +#define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19) +#define VC4_TEX_P1_ETCFLIP_SHIFT 19 +#define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8) +#define VC4_TEX_P1_WIDTH_SHIFT 8 + +#define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7) +#define VC4_TEX_P1_MAGFILT_SHIFT 7 +# define VC4_TEX_P1_MAGFILT_LINEAR 0 +# define VC4_TEX_P1_MAGFILT_NEAREST 1 + +#define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4) +#define VC4_TEX_P1_MINFILT_SHIFT 4 +# define VC4_TEX_P1_MINFILT_LINEAR 0 +# define VC4_TEX_P1_MINFILT_NEAREST 1 +# define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2 +# define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3 +# define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4 +# define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5 + +#define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2) +#define VC4_TEX_P1_WRAP_T_SHIFT 2 +#define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0) +#define VC4_TEX_P1_WRAP_S_SHIFT 0 +# define VC4_TEX_P1_WRAP_REPEAT 0 +# define VC4_TEX_P1_WRAP_CLAMP 1 +# define VC4_TEX_P1_WRAP_MIRROR 2 +# define VC4_TEX_P1_WRAP_BORDER 3 + +#define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30) +#define VC4_TEX_P2_PTYPE_SHIFT 30 +# define VC4_TEX_P2_PTYPE_IGNORED 0 +# define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1 +# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2 +# define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3 + +/* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */ +#define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12) +#define VC4_TEX_P2_CMST_SHIFT 12 +#define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0) +#define VC4_TEX_P2_BSLOD_SHIFT 0 + +/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */ +#define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12) +#define VC4_TEX_P2_CHEIGHT_SHIFT 12 +#define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0) +#define VC4_TEX_P2_CWIDTH_SHIFT 0 + +/* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */ +#define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12) +#define VC4_TEX_P2_CYOFF_SHIFT 12 +#define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0) +#define VC4_TEX_P2_CXOFF_SHIFT 0 + #endif /* VC4_PACKET_H */ diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c index 04e7d67f8f27..abd37867e4d2 100644 --- a/drivers/gpu/drm/vc4/vc4_validate.c +++ b/drivers/gpu/drm/vc4/vc4_validate.c @@ -311,17 +311,18 @@ validate_branch_to_sublist(VALIDATE_ARGS) static int validate_loadstore_tile_buffer_general(VALIDATE_ARGS) { - uint32_t packet_b0 = *(uint8_t *)(untrusted + 0); - uint32_t packet_b1 = *(uint8_t *)(untrusted + 1); + uint16_t packet_b01 = *(uint16_t *)(untrusted + 0); struct drm_gem_cma_object *fbo; - uint32_t buffer_type = packet_b0 & 0xf; + uint32_t buffer_type = VC4_GET_FIELD(packet_b01, + VC4_LOADSTORE_TILE_BUFFER_BUFFER); uint32_t untrusted_address, offset, cpp; switch (buffer_type) { case VC4_LOADSTORE_TILE_BUFFER_NONE: return 0; case VC4_LOADSTORE_TILE_BUFFER_COLOR: - if ((packet_b1 & VC4_LOADSTORE_TILE_BUFFER_MASK) == + if (VC4_GET_FIELD(packet_b01, + VC4_LOADSTORE_TILE_BUFFER_FORMAT) == VC4_LOADSTORE_TILE_BUFFER_RGBA8888) { cpp = 4; } else { @@ -346,9 +347,8 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS) offset = untrusted_address & ~0xf; if (!check_tex_size(exec, fbo, offset, - ((packet_b0 & - VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK) >> - VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT), + VC4_GET_FIELD(packet_b01, + VC4_LOADSTORE_TILE_BUFFER_TILING), exec->fb_width, exec->fb_height, cpp)) { return -EINVAL; } @@ -590,7 +590,7 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS) exec->fb_height = *(uint16_t *)(untrusted + 6); flags = *(uint16_t *)(untrusted + 8); - if ((flags & VC4_RENDER_CONFIG_FORMAT_MASK) == + if (VC4_GET_FIELD(flags, VC4_RENDER_CONFIG_FORMAT) == VC4_RENDER_CONFIG_FORMAT_RGBA8888) { cpp = 4; } else { @@ -599,9 +599,8 @@ validate_tile_rendering_mode_config(VALIDATE_ARGS) offset = *(uint32_t *)untrusted; if (!check_tex_size(exec, fbo, offset, - ((flags & - VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK) >> - VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT), + VC4_GET_FIELD(flags, + VC4_RENDER_CONFIG_MEMORY_FORMAT), exec->fb_width, exec->fb_height, cpp)) { return -EINVAL; } @@ -633,6 +632,9 @@ validate_gem_handles(VALIDATE_ARGS) return 0; } +#define VC4_DEFINE_PACKET(packet, bin, render, name, func) \ + [packet] = { bin, render, packet ## _SIZE, name, func } + static const struct cmd_info { bool bin; bool render; @@ -641,59 +643,59 @@ static const struct cmd_info { int (*func)(struct vc4_exec_info *exec, void *validated, void *untrusted); } cmd_info[] = { - [VC4_PACKET_HALT] = { 1, 1, 1, "halt", NULL }, - [VC4_PACKET_NOP] = { 1, 1, 1, "nop", NULL }, - [VC4_PACKET_FLUSH] = { 1, 1, 1, "flush", NULL }, - [VC4_PACKET_FLUSH_ALL] = { 1, 0, 1, "flush all state", validate_flush_all }, - [VC4_PACKET_START_TILE_BINNING] = { 1, 0, 1, "start tile binning", validate_start_tile_binning }, - [VC4_PACKET_INCREMENT_SEMAPHORE] = { 1, 0, 1, "increment semaphore", validate_increment_semaphore }, - [VC4_PACKET_WAIT_ON_SEMAPHORE] = { 0, 1, 1, "wait on semaphore", validate_wait_on_semaphore }, + VC4_DEFINE_PACKET(VC4_PACKET_HALT, 1, 1, "halt", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_NOP, 1, 1, "nop", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, 1, 1, "flush", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, 1, 0, "flush all state", validate_flush_all), + VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 1, 0, "start tile binning", validate_start_tile_binning), + VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 1, 0, "increment semaphore", validate_increment_semaphore), + VC4_DEFINE_PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 0, 1, "wait on semaphore", validate_wait_on_semaphore), /* BRANCH_TO_SUB_LIST is actually supported in the binner as well, but * we only use it from the render CL in order to jump into the tile * allocation BO. */ - [VC4_PACKET_BRANCH_TO_SUB_LIST] = { 0, 1, 5, "branch to sublist", validate_branch_to_sublist }, - [VC4_PACKET_STORE_MS_TILE_BUFFER] = { 0, 1, 1, "store MS resolved tile color buffer", NULL }, - [VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF] = { 0, 1, 1, "store MS resolved tile color buffer and EOF", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 0, 1, "branch to sublist", validate_branch_to_sublist), + VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 0, 1, "store MS resolved tile color buffer", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 0, 1, "store MS resolved tile color buffer and EOF", NULL), - [VC4_PACKET_STORE_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Store Tile Buffer General", validate_loadstore_tile_buffer_general }, - [VC4_PACKET_LOAD_TILE_BUFFER_GENERAL] = { 0, 1, 7, "Load Tile Buffer General", validate_loadstore_tile_buffer_general }, + VC4_DEFINE_PACKET(VC4_PACKET_STORE_TILE_BUFFER_GENERAL, 0, 1, "Store Tile Buffer General", validate_loadstore_tile_buffer_general), + VC4_DEFINE_PACKET(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL, 0, 1, "Load Tile Buffer General", validate_loadstore_tile_buffer_general), - [VC4_PACKET_GL_INDEXED_PRIMITIVE] = { 1, 1, 14, "Indexed Primitive List", validate_indexed_prim_list }, + VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 1, 1, "Indexed Primitive List", validate_indexed_prim_list), - [VC4_PACKET_GL_ARRAY_PRIMITIVE] = { 1, 1, 10, "Vertex Array Primitives", validate_gl_array_primitive }, + VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 1, 1, "Vertex Array Primitives", validate_gl_array_primitive), /* This is only used by clipped primitives (packets 48 and 49), which * we don't support parsing yet. */ - [VC4_PACKET_PRIMITIVE_LIST_FORMAT] = { 1, 1, 2, "primitive list format", NULL }, - - [VC4_PACKET_GL_SHADER_STATE] = { 1, 1, 5, "GL Shader State", validate_gl_shader_state }, - [VC4_PACKET_NV_SHADER_STATE] = { 1, 1, 5, "NV Shader State", validate_nv_shader_state }, - - [VC4_PACKET_CONFIGURATION_BITS] = { 1, 1, 4, "configuration bits", NULL }, - [VC4_PACKET_FLAT_SHADE_FLAGS] = { 1, 1, 5, "flat shade flags", NULL }, - [VC4_PACKET_POINT_SIZE] = { 1, 1, 5, "point size", NULL }, - [VC4_PACKET_LINE_WIDTH] = { 1, 1, 5, "line width", NULL }, - [VC4_PACKET_RHT_X_BOUNDARY] = { 1, 1, 3, "RHT X boundary", NULL }, - [VC4_PACKET_DEPTH_OFFSET] = { 1, 1, 5, "Depth Offset", NULL }, - [VC4_PACKET_CLIP_WINDOW] = { 1, 1, 9, "Clip Window", NULL }, - [VC4_PACKET_VIEWPORT_OFFSET] = { 1, 1, 5, "Viewport Offset", NULL }, - [VC4_PACKET_CLIPPER_XY_SCALING] = { 1, 1, 9, "Clipper XY Scaling", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, 1, 1, "primitive list format", NULL), + + VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, 1, 1, "GL Shader State", validate_gl_shader_state), + VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, 1, 1, "NV Shader State", validate_nv_shader_state), + + VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, 1, 1, "configuration bits", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 1, 1, "flat shade flags", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, 1, 1, "point size", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, 1, 1, "line width", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, 1, 1, "RHT X boundary", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, 1, 1, "Depth Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, 1, 1, "Clip Window", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, 1, 1, "Viewport Offset", NULL), + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, 1, 1, "Clipper XY Scaling", NULL), /* Note: The docs say this was also 105, but it was 106 in the * initial userland code drop. */ - [VC4_PACKET_CLIPPER_Z_SCALING] = { 1, 1, 9, "Clipper Z Scale and Offset", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, 1, 1, "Clipper Z Scale and Offset", NULL), - [VC4_PACKET_TILE_BINNING_MODE_CONFIG] = { 1, 0, 16, "tile binning configuration", validate_tile_binning_config }, + VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 1, 0, "tile binning configuration", validate_tile_binning_config), - [VC4_PACKET_TILE_RENDERING_MODE_CONFIG] = { 0, 1, 11, "tile rendering mode configuration", validate_tile_rendering_mode_config}, + VC4_DEFINE_PACKET(VC4_PACKET_TILE_RENDERING_MODE_CONFIG, 0, 1, "tile rendering mode configuration", validate_tile_rendering_mode_config), - [VC4_PACKET_CLEAR_COLORS] = { 0, 1, 14, "Clear Colors", NULL }, + VC4_DEFINE_PACKET(VC4_PACKET_CLEAR_COLORS, 0, 1, "Clear Colors", NULL), - [VC4_PACKET_TILE_COORDINATES] = { 0, 1, 3, "Tile Coordinates", validate_tile_coordinates }, + VC4_DEFINE_PACKET(VC4_PACKET_TILE_COORDINATES, 0, 1, "Tile Coordinates", validate_tile_coordinates), - [VC4_PACKET_GEM_HANDLES] = { 1, 1, 9, "GEM handles", validate_gem_handles }, + VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, 1, 1, "GEM handles", validate_gem_handles), }; int @@ -814,10 +816,10 @@ reloc_tex(struct vc4_exec_info *exec, uint32_t p3 = (sample->p_offset[3] != ~0 ? *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0); uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; - uint32_t offset = p0 & ~0xfff; - uint32_t miplevels = (p0 & 15); - uint32_t width = (p1 >> 8) & 2047; - uint32_t height = (p1 >> 20) & 2047; + uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK; + uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS); + uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH); + uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT); uint32_t cpp, tiling_format, utile_w, utile_h; uint32_t i; uint32_t cube_map_stride = 0; @@ -845,16 +847,18 @@ reloc_tex(struct vc4_exec_info *exec, if (height == 0) height = 2048; - if (p0 & (1 << 9)) { - if ((p2 & (3 << 30)) == (1 << 30)) - cube_map_stride = p2 & 0x3ffff000; - if ((p3 & (3 << 30)) == (1 << 30)) { + if (p0 & VC4_TEX_P0_CMMODE_MASK) { + if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) == + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) + cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; + if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) == + VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { if (cube_map_stride) { DRM_ERROR("Cube map stride set twice\n"); return false; } - cube_map_stride = p3 & 0x3ffff000; + cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK; } if (!cube_map_stride) { DRM_ERROR("Cube map stride not set\n"); @@ -862,7 +866,8 @@ reloc_tex(struct vc4_exec_info *exec, } } - type = ((p0 >> 4) & 15) | ((p1 >> 31) << 4); + type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) | + (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4)); switch (type) { case VC4_TEXTURE_TYPE_RGBA8888: diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 839d5521cfe5..01351874ff8c 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -57,7 +57,8 @@ struct vc4_shader_validation_state { * * This is used for the validation of direct address memory reads. */ - uint32_t live_clamp_offsets[32 + 32 + 4]; + uint32_t live_min_clamp_offsets[32 + 32 + 4]; + bool live_max_clamp_regs[32 + 32 + 4]; }; static uint32_t @@ -76,6 +77,25 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b) } } +static uint32_t +raddr_add_a_to_live_reg_index(uint64_t inst) +{ + uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); + uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); + uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); + + if (add_a == QPU_MUX_A) { + return raddr_a; + } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) { + return 32 + raddr_b; + } else if (add_a <= QPU_MUX_R3) { + return 64 + add_a; + } else { + return ~0; + } +} + static bool is_tmu_submit(uint32_t waddr) { @@ -135,9 +155,8 @@ check_tmu_write(uint64_t inst, uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); if (is_direct) { - uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); - uint32_t clamp_offset = ~0; + uint32_t clamp_reg, clamp_offset; if (sig == QPU_SIG_SMALL_IMM) { DRM_ERROR("direct TMU read used small immediate\n"); @@ -158,14 +177,13 @@ check_tmu_write(uint64_t inst, * This is arbitrary, but simpler than supporting flipping the * two either way. */ - if (add_a == QPU_MUX_A) { - clamp_offset = validation_state->live_clamp_offsets[raddr_a]; - } else if (add_a == QPU_MUX_B) { - clamp_offset = validation_state->live_clamp_offsets[32 + raddr_b]; - } else if (add_a <= QPU_MUX_R4) { - clamp_offset = validation_state->live_clamp_offsets[64 + add_a]; + clamp_reg = raddr_add_a_to_live_reg_index(inst); + if (clamp_reg == ~0) { + DRM_ERROR("direct TMU load wasn't clamped\n"); + return false; } + clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; if (clamp_offset == ~0) { DRM_ERROR("direct TMU load wasn't clamped\n"); return false; @@ -228,8 +246,6 @@ check_register_write(uint64_t inst, uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); - bool is_b = is_mul != ((inst & QPU_WS) != 0); - uint32_t live_reg_index; switch (waddr) { case QPU_W_UNIFORMS_ADDRESS: @@ -284,14 +300,6 @@ check_register_write(uint64_t inst, return true; } - /* Clear out the live offset clamp tracking for the written register. - * If this particular instruction is setting up an offset clamp, it'll - * get tracked immediately after we return. - */ - live_reg_index = waddr_to_live_reg_index(waddr, is_b); - if (live_reg_index != ~0) - validation_state->live_clamp_offsets[live_reg_index] = ~0; - return true; } @@ -300,26 +308,72 @@ track_live_clamps(uint64_t inst, struct vc4_validated_shader_info *validated_shader, struct vc4_shader_validation_state *validation_state) { + uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); + uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); + uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); + uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); - bool is_b = inst & QPU_WS; - uint32_t live_reg_index; + bool ws = inst & QPU_WS; + uint32_t lri_add_a, lri_add, lri_mul; + bool add_a_is_min_0; - if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN) + /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), + * before we clear previous live state. + */ + lri_add_a = raddr_add_a_to_live_reg_index(inst); + add_a_is_min_0 = (lri_add_a != ~0 && + validation_state->live_max_clamp_regs[lri_add_a]); + + /* Clear live state for registers written by our instruction. */ + lri_add = waddr_to_live_reg_index(waddr_add, ws); + lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); + if (lri_mul != ~0) { + validation_state->live_max_clamp_regs[lri_mul] = false; + validation_state->live_min_clamp_offsets[lri_mul] = ~0; + } + if (lri_add != ~0) { + validation_state->live_max_clamp_regs[lri_add] = false; + validation_state->live_min_clamp_offsets[lri_add] = ~0; + } else { + /* Nothing further to do for live tracking, since only ADDs + * generate new live clamp registers. + */ return; + } + + /* Now, handle remaining live clamp tracking for the ADD operation. */ - if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && - !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && - sig != QPU_SIG_SMALL_IMM)) { + if (cond_add != QPU_COND_ALWAYS) return; - } - live_reg_index = waddr_to_live_reg_index(waddr_add, is_b); - if (live_reg_index != ~0) { - validation_state->live_clamp_offsets[live_reg_index] = + if (op_add == QPU_A_MAX) { + /* Track live clamps of a value to a minimum of 0 (in either + * arg). + */ + if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || + (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { + return; + } + + validation_state->live_max_clamp_regs[lri_add] = true; + } if (op_add == QPU_A_MIN) { + /* Track live clamps of a value clamped to a minimum of 0 and + * a maximum of some uniform's offset. + */ + if (!add_a_is_min_0) + return; + + if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && + !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && + sig != QPU_SIG_SMALL_IMM)) { + return; + } + + validation_state->live_min_clamp_offsets[lri_add] = validated_shader->uniforms_size; } } @@ -399,8 +453,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) for (i = 0; i < 8; i++) validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; - for (i = 0; i < ARRAY_SIZE(validation_state.live_clamp_offsets); i++) - validation_state.live_clamp_offsets[i] = ~0; + for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) + validation_state.live_min_clamp_offsets[i] = ~0; shader = shader_obj->vaddr; max_ip = shader_obj->base.size / sizeof(uint64_t); -- cgit v1.2.1