diff options
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 83 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 6 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 2 |
3 files changed, 28 insertions, 63 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 9591c42ede..37080b990e 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -2045,37 +2045,25 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] b | otherwise = do code_src <- getAnyReg src + src_r <- getNewRegNat format + tmp_r <- getNewRegNat format let dst_r = getRegisterReg platform False (CmmLocal dst) - if isBmi2Enabled dflags - then do - src_r <- getNewRegNat (intFormat width) - return $ appOL (code_src src_r) $ case width of - W8 -> toOL - [ MOVZxL II8 (OpReg src_r) (OpReg src_r) -- zero-extend to 32 bit - , LZCNT II32 (OpReg src_r) dst_r -- lzcnt with extra 24 zeros - , SUB II32 (OpImm (ImmInt 24)) (OpReg dst_r) -- compensate for extra zeros - ] - W16 -> toOL - [ LZCNT II16 (OpReg src_r) dst_r - , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) -- zero-extend from 16 bit - ] - _ -> unitOL (LZCNT (intFormat width) (OpReg src_r) dst_r) - else do - let format = if width == W8 then II16 else intFormat width - src_r <- getNewRegNat format - tmp_r <- getNewRegNat format - return $ code_src src_r `appOL` toOL - ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ - [ BSR format (OpReg src_r) tmp_r - , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r) - , CMOV NE format (OpReg tmp_r) dst_r - , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r) - ]) -- NB: We don't need to zero-extend the result for the - -- W8/W16 cases because the 'MOV' insn already - -- took care of implicitly clearing the upper bits + + -- The following insn sequence makes sure 'clz 0' has a defined value. + -- starting with Haswell, one could use the LZCNT insn instead. + return $ code_src src_r `appOL` toOL + ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ + [ BSR format (OpReg src_r) tmp_r + , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r) + , CMOV NE format (OpReg tmp_r) dst_r + , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r) + ]) -- NB: We don't need to zero-extend the result for the + -- W8/W16 cases because the 'MOV' insn already + -- took care of implicitly clearing the upper bits where bw = widthInBits width platform = targetPlatform dflags + format = if width == W8 then II16 else intFormat width lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width)) genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid @@ -2085,7 +2073,6 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid dst_r = getRegisterReg platform False (CmmLocal dst) lbl1 <- getBlockIdNat lbl2 <- getBlockIdNat - let format = if width == W8 then II16 else intFormat width tmp_r <- getNewRegNat format -- New CFG Edges: @@ -2122,38 +2109,24 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid | otherwise = do code_src <- getAnyReg src + src_r <- getNewRegNat format + tmp_r <- getNewRegNat format let dst_r = getRegisterReg platform False (CmmLocal dst) - if isBmi2Enabled dflags - then do - src_r <- getNewRegNat (intFormat width) - return $ appOL (code_src src_r) $ case width of - W8 -> toOL - [ OR II32 (OpImm (ImmInt 0xFFFFFF00)) (OpReg src_r) - , TZCNT II32 (OpReg src_r) dst_r - ] - W16 -> toOL - [ TZCNT II16 (OpReg src_r) dst_r - , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) - ] - _ -> unitOL $ TZCNT (intFormat width) (OpReg src_r) dst_r - else do - -- The following insn sequence makes sure 'ctz 0' has a defined value. - -- starting with Haswell, one could use the TZCNT insn instead. - let format = if width == W8 then II16 else intFormat width - src_r <- getNewRegNat format - tmp_r <- getNewRegNat format - return $ code_src src_r `appOL` toOL - ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ - [ BSF format (OpReg src_r) tmp_r - , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r) - , CMOV NE format (OpReg tmp_r) dst_r - ]) -- NB: We don't need to zero-extend the result for the - -- W8/W16 cases because the 'MOV' insn already - -- took care of implicitly clearing the upper bits + -- The following insn sequence makes sure 'ctz 0' has a defined value. + -- starting with Haswell, one could use the TZCNT insn instead. + return $ code_src src_r `appOL` toOL + ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++ + [ BSF format (OpReg src_r) tmp_r + , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r) + , CMOV NE format (OpReg tmp_r) dst_r + ]) -- NB: We don't need to zero-extend the result for the + -- W8/W16 cases because the 'MOV' insn already + -- took care of implicitly clearing the upper bits where bw = widthInBits width platform = targetPlatform dflags + format = if width == W8 then II16 else intFormat width genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do targetExpr <- cmmMakeDynamicReference dflags diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 5e790e481e..c47e1fae83 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -342,8 +342,6 @@ data Instr -- bit counting instructions | POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1 - | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros - | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros | BSF Format Operand Reg -- bit scan forward | BSR Format Operand Reg -- bit scan reverse @@ -473,8 +471,6 @@ x86_regUsageOfInstr platform instr DELTA _ -> noUsage POPCNT _ src dst -> mkRU (use_R src []) [dst] - LZCNT _ src dst -> mkRU (use_R src []) [dst] - TZCNT _ src dst -> mkRU (use_R src []) [dst] BSF _ src dst -> mkRU (use_R src []) [dst] BSR _ src dst -> mkRU (use_R src []) [dst] @@ -657,8 +653,6 @@ x86_patchRegsOfInstr instr env CLTD _ -> instr POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst) - LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst) - TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst) PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst) PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst) BSF fmt src dst -> BSF fmt (patchOp src) (env dst) diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 075bb26337..bf449d044e 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -693,8 +693,6 @@ pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst pprInstr (XOR format src dst) = pprFormatOpOp (sLit "xor") format src dst pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst) -pprInstr (LZCNT format src dst) = pprOpOp (sLit "lzcnt") format src (OpReg dst) -pprInstr (TZCNT format src dst) = pprOpOp (sLit "tzcnt") format src (OpReg dst) pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst) pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst) |