summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Gamari <ben@smart-cactus.org>2019-12-18 14:35:57 -0500
committerBen Gamari <ben@well-typed.com>2020-01-17 19:28:12 -0500
commit9fe3f88d296256f4a9e96c53d341a725d5e2c419 (patch)
treea1f62e15faa1dc07891b67907ecb3f7ee1ba9a88
parenta71323ffebf7663c50025d2731bf9de2d04f82c3 (diff)
downloadhaskell-wip/T17588.tar.gz
nativeGen/X86: Add UD2 instructions after indirect brancheswip/T17588
As noted in the Intel Software Optimization Manual (section 3.4.1.6, Rule 14), the default prediction for indirect branches is to fall-through. This means that the instruction decoder will attempt to decode anything that follows indirect branches. This will inject operations in to the pipeline that are doomed to fail (since our indirect branches will never fallthrough), resulting in resource conflicts. Fixes #17588.
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs28
-rw-r--r--compiler/nativeGen/X86/Instr.hs7
-rw-r--r--compiler/nativeGen/X86/Ppr.hs1
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm1
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm1
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/memcpy.asm1
-rw-r--r--testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm1
7 files changed, 32 insertions, 8 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 8cea28d920..616c23a657 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -180,11 +180,16 @@ verifyBasicBlock instrs
CALL {} | atEnd -> faultyBlockWith i
| not atEnd -> go atEnd instr
-- All instructions ok, check if we reached the end and continue.
- _ | not atEnd -> go (isJumpishInstr i) instr
- -- Only jumps allowed at the end of basic blocks.
- | otherwise -> if isJumpishInstr i
- then go True instr
- else faultyBlockWith i
+ _ | not atEnd -> go (isJumpishInstr i) instr
+ | isTerminalInstr i -> go True instr
+ | otherwise -> faultyBlockWith i
+
+ -- Only jumps (or UD2, which always follows indirect jumps)
+ -- allowed at the end of basic blocks.
+ isTerminalInstr UD2 = True
+ isTerminalInstr i | isJumpishInstr i = True
+ isTerminalInstr _ = False
+
faultyBlockWith i
= pprPanic "Non control flow instructions after end of basic block."
(ppr i <+> text "in:" $$ vcat (map ppr instrs))
@@ -1766,19 +1771,28 @@ assignReg_FltCode _ reg src = do
let platform = targetPlatform dflags
return (src_code (getRegisterReg platform reg))
+-- Note [UD2 after indirect jumps]
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+--
+-- We follow all indirect jumps with a UD2 instruction to ensure that the
+-- instruction decoder doesn't attempt to decode the fallthrough path, which
+-- can result in resource conflicts. See Intel Software Optimisation Manual
+-- Section 3.4.1.6 (Branch Type Selection), Rule 14.
genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock
genJump (CmmLoad mem _) regs = do
Amode target code <- getAmode mem
- return (code `snocOL` JMP (OpAddr target) regs)
+ -- See Note [UD2 after indirect jumps]
+ return (code `snocOL` JMP (OpAddr target) regs `snocOL` UD2)
genJump (CmmLit lit) regs = do
return (unitOL (JMP (OpImm (litToImm lit)) regs))
genJump expr regs = do
(reg,code) <- getSomeReg expr
- return (code `snocOL` JMP (OpReg reg) regs)
+ -- See Note [UD2 after indirect jumps]
+ return (code `snocOL` JMP (OpReg reg) regs `snocOL` UD2)
-- -----------------------------------------------------------------------------
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 80a2c8b28e..ae91352316 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -329,6 +329,8 @@ data Instr
| CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit
| MFENCE
+ | UD2
+
data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2
@@ -430,6 +432,7 @@ x86_regUsageOfInstr platform instr
XADD _ src dst -> usageMM src dst
CMPXCHG _ src dst -> usageRMM src dst (OpReg eax)
MFENCE -> noUsage
+ UD2 -> noUsage
_other -> panic "regUsage: unrecognised instr"
where
@@ -588,8 +591,10 @@ x86_patchRegsOfInstr instr env
XADD fmt src dst -> patch2 (XADD fmt) src dst
CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst
MFENCE -> instr
+ UD2 -> instr
- _other -> panic "patchRegs: unrecognised instr"
+ LDATA _ _ -> panic "patchRegs(LDATA)"
+ NEWBLOCK _ -> panic "patchRegs(NEWBLOCK)"
where
patch1 :: (Operand -> a) -> Operand -> a
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 76a806982e..a01466a3d0 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -832,6 +832,7 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst
pprInstr (CMPXCHG format src dst)
= pprFormatOpOp (sLit "cmpxchg") format src dst
+pprInstr UD2 = text "\t ud2"
--------------------------
diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm
index 1bafb34ce9..074fa59dcb 100644
--- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm
+++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm
@@ -16,6 +16,7 @@ callMemcpy:
movl %eax,12(%rbx)
.Lcb:
jmp *(%rbp)
+ ud2
.size callMemcpy, .-callMemcpy
.section .note.GNU-stack,"",@progbits
.ident "GHC 7.7.20121009"
diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm
index ffb27e70b7..6a236c69eb 100644
--- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm
+++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm
@@ -13,6 +13,7 @@ callMemcpy:
movl 12(%r14),%eax
movl %eax,12(%rbx)
jmp *(%rbp)
+ ud2
.size callMemcpy, .-callMemcpy
.section .note.GNU-stack,"",@progbits
.ident "GHC 7.7.20121009"
diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy.asm
index eedd5ad5dc..701775f717 100644
--- a/testsuite/tests/codeGen/should_gen_asm/memcpy.asm
+++ b/testsuite/tests/codeGen/should_gen_asm/memcpy.asm
@@ -7,3 +7,4 @@ callMemcpy:
call memcpy
addq
jmp
+ ud2
diff --git a/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm b/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm
index 4c5c20bfdf..414f8b1b40 100644
--- a/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm
+++ b/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm
@@ -9,6 +9,7 @@ callMemset:
movl $16843009,8(%rbx)
movl $16843009,12(%rbx)
jmp *(%rbp)
+ ud2
.size callMemset, .-callMemset
.section .note.GNU-stack,"",@progbits
.ident "GHC 7.9.20140311"