diff options
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 28 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 7 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memcpy.asm | 1 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm | 1 |
7 files changed, 32 insertions, 8 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 8cea28d920..616c23a657 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -180,11 +180,16 @@ verifyBasicBlock instrs CALL {} | atEnd -> faultyBlockWith i | not atEnd -> go atEnd instr -- All instructions ok, check if we reached the end and continue. - _ | not atEnd -> go (isJumpishInstr i) instr - -- Only jumps allowed at the end of basic blocks. - | otherwise -> if isJumpishInstr i - then go True instr - else faultyBlockWith i + _ | not atEnd -> go (isJumpishInstr i) instr + | isTerminalInstr i -> go True instr + | otherwise -> faultyBlockWith i + + -- Only jumps (or UD2, which always follows indirect jumps) + -- allowed at the end of basic blocks. + isTerminalInstr UD2 = True + isTerminalInstr i | isJumpishInstr i = True + isTerminalInstr _ = False + faultyBlockWith i = pprPanic "Non control flow instructions after end of basic block." (ppr i <+> text "in:" $$ vcat (map ppr instrs)) @@ -1766,19 +1771,28 @@ assignReg_FltCode _ reg src = do let platform = targetPlatform dflags return (src_code (getRegisterReg platform reg)) +-- Note [UD2 after indirect jumps] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- We follow all indirect jumps with a UD2 instruction to ensure that the +-- instruction decoder doesn't attempt to decode the fallthrough path, which +-- can result in resource conflicts. See Intel Software Optimisation Manual +-- Section 3.4.1.6 (Branch Type Selection), Rule 14. genJump :: CmmExpr{-the branch target-} -> [Reg] -> NatM InstrBlock genJump (CmmLoad mem _) regs = do Amode target code <- getAmode mem - return (code `snocOL` JMP (OpAddr target) regs) + -- See Note [UD2 after indirect jumps] + return (code `snocOL` JMP (OpAddr target) regs `snocOL` UD2) genJump (CmmLit lit) regs = do return (unitOL (JMP (OpImm (litToImm lit)) regs)) genJump expr regs = do (reg,code) <- getSomeReg expr - return (code `snocOL` JMP (OpReg reg) regs) + -- See Note [UD2 after indirect jumps] + return (code `snocOL` JMP (OpReg reg) regs `snocOL` UD2) -- ----------------------------------------------------------------------------- diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 80a2c8b28e..ae91352316 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -329,6 +329,8 @@ data Instr | CMPXCHG Format Operand Operand -- src (r), dst (r/m), eax implicit | MFENCE + | UD2 + data PrefetchVariant = NTA | Lvl0 | Lvl1 | Lvl2 @@ -430,6 +432,7 @@ x86_regUsageOfInstr platform instr XADD _ src dst -> usageMM src dst CMPXCHG _ src dst -> usageRMM src dst (OpReg eax) MFENCE -> noUsage + UD2 -> noUsage _other -> panic "regUsage: unrecognised instr" where @@ -588,8 +591,10 @@ x86_patchRegsOfInstr instr env XADD fmt src dst -> patch2 (XADD fmt) src dst CMPXCHG fmt src dst -> patch2 (CMPXCHG fmt) src dst MFENCE -> instr + UD2 -> instr - _other -> panic "patchRegs: unrecognised instr" + LDATA _ _ -> panic "patchRegs(LDATA)" + NEWBLOCK _ -> panic "patchRegs(NEWBLOCK)" where patch1 :: (Operand -> a) -> Operand -> a diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 76a806982e..a01466a3d0 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -832,6 +832,7 @@ pprInstr (XADD format src dst) = pprFormatOpOp (sLit "xadd") format src dst pprInstr (CMPXCHG format src dst) = pprFormatOpOp (sLit "cmpxchg") format src dst +pprInstr UD2 = text "\t ud2" -------------------------- diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm index 1bafb34ce9..074fa59dcb 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll-conprop.asm @@ -16,6 +16,7 @@ callMemcpy: movl %eax,12(%rbx) .Lcb: jmp *(%rbp) + ud2 .size callMemcpy, .-callMemcpy .section .note.GNU-stack,"",@progbits .ident "GHC 7.7.20121009" diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm index ffb27e70b7..6a236c69eb 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memcpy-unroll.asm @@ -13,6 +13,7 @@ callMemcpy: movl 12(%r14),%eax movl %eax,12(%rbx) jmp *(%rbp) + ud2 .size callMemcpy, .-callMemcpy .section .note.GNU-stack,"",@progbits .ident "GHC 7.7.20121009" diff --git a/testsuite/tests/codeGen/should_gen_asm/memcpy.asm b/testsuite/tests/codeGen/should_gen_asm/memcpy.asm index eedd5ad5dc..701775f717 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memcpy.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memcpy.asm @@ -7,3 +7,4 @@ callMemcpy: call memcpy addq jmp + ud2 diff --git a/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm b/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm index 4c5c20bfdf..414f8b1b40 100644 --- a/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm +++ b/testsuite/tests/codeGen/should_gen_asm/memset-unroll.asm @@ -9,6 +9,7 @@ callMemset: movl $16843009,8(%rbx) movl $16843009,12(%rbx) jmp *(%rbp) + ud2 .size callMemset, .-callMemset .section .note.GNU-stack,"",@progbits .ident "GHC 7.9.20140311" |