diff options
author | Ben Gamari <ben@smart-cactus.org> | 2019-12-18 14:35:57 -0500 |
---|---|---|
committer | Ben Gamari <ben@well-typed.com> | 2020-01-17 19:28:12 -0500 |
commit | 9fe3f88d296256f4a9e96c53d341a725d5e2c419 (patch) | |
tree | a1f62e15faa1dc07891b67907ecb3f7ee1ba9a88 | |
parent | a71323ffebf7663c50025d2731bf9de2d04f82c3 (diff) | |
download | haskell-wip/T17588.tar.gz |
nativeGen/X86: Add UD2 instructions after indirect brancheswip/T17588
As noted in the Intel Software Optimization Manual (section 3.4.1.6,
Rule 14), the default prediction for indirect branches is to
fall-through. This means that the instruction decoder will attempt to
decode anything that follows indirect branches. This will inject
operations in to the pipeline that are doomed to fail (since our
indirect branches will never fallthrough), resulting in resource
conflicts.
Fixes #17588.
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 28 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 7 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memcpy.asm | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm | 1 |
7 files changed, 32 insertions, 8 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 8cea28d920..616c23a657 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -180,11 +180,16 @@ verifyBasicBlock instrs CALL {} | atEnd -> faultyBlockWith i | not atEnd -> go atEnd instr -- All instructions ok, check if we reached the end and continue. - _ | not atEnd -> go (isJumpishInstr i) instr - -- Only jumps allowed at the end of basic blocks. - | otherwise -> if isJumpishInstr i - then go True instr - else faultyBlockWith i + _ | not atEnd -> go (isJumpishInstr i) instr + | isTerminalInstr i -> go True instr + | otherwise -> faultyBlockWith i + + -- Only jumps (or UD2, which always follows indirect jumps) + -- allowed at the end of basic blocks. + isTerminalInstr UD2 = True + isTerminalInstr i | isJumpishInstr i = True + isTerminalInstr _ = False + faultyBlockWith i = pprPanic "Non control flow instructions after end of basic block." (ppr i <+> text "in:" $$ vcat (map ppr instrs)) @@ -1766,19 +1771,28 @@ assignReg_FltCode _ reg src = do let platform = targetPlatform dflags return (src_code (getRegisterReg platform reg)) +-- Note [UD2 after indirect jumps] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We follow all indirect jumps with a UD2 instruction to ensure that the +-- instruction decoder doesn't attempt to decode the fallthrough path, which +-- can result in resource conflicts. See Intel Software Optimisation Manual +-- Section 3.4.1.6 (Branch Type Selection), Rule 14. genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock genJump (CmmLoad mem _) regs = do Amode target code <- getAmode mem - return (code `snocOL` JMP (OpAddr target) regs) + -- See Note [UD2 after indirect jumps] + return (code `snocOL` JMP (OpAddr target) regs `snocOL` UD2) genJump (CmmLit lit) regs = do return (unitOL (JMP (OpImm (litToImm lit)) regs)) genJump expr regs = do (reg,code) <- getSomeReg expr - return (code `snocOL` JMP (OpReg reg) regs) + -- See Note [UD2 after indirect jumps] + return (code `snocOL` JMP (OpReg reg) regs `snocOL` UD2) -- ----------------------------------------------------------------------------- diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 80a2c8b28e..ae91352316 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -329,6 +329,8 @@ data Instr | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit | MFENCE + | UD2 + data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 @@ -430,6 +432,7 @@ x86_regUsageOfInstr platform instr XADD _ src dst -> usageMM src dst CMPXCHG _ src dst -> usageRMM src dst (OpReg eax) MFENCE -> noUsage + UD2 -> noUsage _other -> panic "regUsage: unrecognised instr" where @@ -588,8 +591,10 @@ x86_patchRegsOfInstr instr env XADD fmt src dst -> patch2 (XADD fmt) src dst CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst MFENCE -> instr + UD2 -> instr - _other -> panic "patchRegs: unrecognised instr" + LDATA _ _ -> panic "patchRegs(LDATA)" + NEWBLOCK _ -> panic "patchRegs(NEWBLOCK)" where patch1 :: (Operand -> a) -> Operand -> a diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 76a806982e..a01466a3d0 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -832,6 +832,7 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst pprInstr (CMPXCHG format src dst) = pprFormatOpOp (sLit "cmpxchg") format src dst +pprInstr UD2 = text "\t ud2" -------------------------- diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm index 1bafb34ce9..074fa59dcb 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm @@ -16,6 +16,7 @@ callMemcpy: movl %eax,12(%rbx) .Lcb: jmp *(%rbp) + ud2 .size callMemcpy, .-callMemcpy .section .note.GNU-stack,"",@progbits .ident "GHC 7.7.20121009" diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm index ffb27e70b7..6a236c69eb 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm @@ -13,6 +13,7 @@ callMemcpy: movl 12(%r14),%eax movl %eax,12(%rbx) jmp *(%rbp) + ud2 .size callMemcpy, .-callMemcpy .section .note.GNU-stack,"",@progbits .ident "GHC 7.7.20121009" diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy.asm index eedd5ad5dc..701775f717 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memcpy.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memcpy.asm @@ -7,3 +7,4 @@ callMemcpy: call memcpy addq jmp + ud2 diff --git a/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm b/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm index 4c5c20bfdf..414f8b1b40 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm @@ -9,6 +9,7 @@ callMemset: movl $16843009,8(%rbx) movl $16843009,12(%rbx) jmp *(%rbp) + ud2 .size callMemset, .-callMemset .section .note.GNU-stack,"",@progbits .ident "GHC 7.9.20140311" |