summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJason Ekstrand <jason.ekstrand@intel.com>2015-11-24 09:01:11 -0800
committerJason Ekstrand <jason.ekstrand@intel.com>2016-04-14 15:59:33 -0700
commit27bd8ac6f309b9f052a7fa9380ac5e12fb686e31 (patch)
tree384b317b2a61f29275088e6f50414dd9ea585d52
parent889e6054b7795baa789cc771e76e009d1605efae (diff)
downloadmesa-27bd8ac6f309b9f052a7fa9380ac5e12fb686e31.tar.gz
i965/fs: Add support for MOV_INDIRECT on pre-Broadwell hardware
While we're at it, we also add support for the possibility that the indirect is, in fact, a constant. This shouldn't happen in the common case (if it does, that means NIR failed to constant-fold something), but it's possible so we should handle it. Reviewed-by: Kristian Høgsberg <krh@bitplanet.net> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp4
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_generator.cpp75
2 files changed, 66 insertions, 13 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 24482096f17..0a2d769653f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4483,6 +4483,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
return 8;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ /* Prior to Broadwell, we only have 8 address subregisters */
+ return devinfo->gen < 8 ? 8 : MIN2(inst->exec_size, 16);
+
default:
return inst->exec_size;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 35400cbe8c3..851cccf0f7c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -351,22 +351,71 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
- /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
- struct brw_reg addr = vec8(brw_address_reg(0));
+ if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) {
+ imm_byte_offset += indirect_byte_offset.ud;
- /* The destination stride of an instruction (in bytes) must be greater
- * than or equal to the size of the rest of the instruction. Since the
- * address register is of type UW, we can't use a D-type instruction.
- * In order to get around this, re re-type to UW and use a stride.
- */
- indirect_byte_offset =
- retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+ reg.nr = imm_byte_offset / REG_SIZE;
+ reg.subnr = imm_byte_offset % REG_SIZE;
+ brw_MOV(p, dst, reg);
+ } else {
+ /* Prior to Broadwell, there are only 8 address registers. */
+ assert(inst->exec_size == 8 || devinfo->gen >= 8);
+
+ /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+ struct brw_reg addr = vec8(brw_address_reg(0));
+
+ /* The destination stride of an instruction (in bytes) must be greater
+ * than or equal to the size of the rest of the instruction. Since the
+ * address register is of type UW, we can't use a D-type instruction.
+ * In order to get around this, re retype to UW and use a stride.
+ */
+ indirect_byte_offset =
+ retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+ struct brw_reg ind_src;
+ if (devinfo->gen < 8) {
+ /* From the Haswell PRM section "Register Region Restrictions":
+ *
+ * "The lower bits of the AddressImmediate must not overflow to
+ * change the register address. The lower 5 bits of Address
+ * Immediate when added to lower 5 bits of address register gives
+ * the sub-register offset. The upper bits of Address Immediate
+ * when added to upper bits of address register gives the register
+ * address. Any overflow from sub-register offset is dropped."
+ *
+ * This restriction is only listed in the Haswell PRM but emperical
+ * testing indicates that it applies on all older generations and is
+ * lifted on Broadwell.
+ *
+ * Since the indirect may cause us to cross a register boundary, this
+ * makes the base offset almost useless. We could try and do
+ * something clever where we use a actual base offset if
+ * base_offset % 32 == 0 but that would mean we were generating
+ * different code depending on the base offset. Instead, for the
+ * sake of consistency, we'll just do the add ourselves.
+ */
+ brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
+ ind_src = brw_VxH_indirect(0, 0);
+ } else {
+ brw_MOV(p, addr, indirect_byte_offset);
+ ind_src = brw_VxH_indirect(0, imm_byte_offset);
+ }
- /* Prior to Broadwell, there are only 8 address registers. */
- assert(inst->exec_size == 8 || devinfo->gen >= 8);
+ brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
- brw_MOV(p, addr, indirect_byte_offset);
- brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+ if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE &&
+ !inst->get_next()->is_tail_sentinel() &&
+ ((fs_inst *)inst->get_next())->mlen > 0) {
+ /* From the Sandybridge PRM:
+ *
+ * "[Errata: DevSNB(SNB)] If MRF register is updated by any
+ * instruction that “indexed/indirect” source AND is followed by a
+ * send, the instruction requires a “Switch”. This is to avoid
+ * race condition where send may dispatch before MRF is updated."
+ */
+ brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH);
+ }
+ }
}
void