summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs83
-rw-r--r--compiler/nativeGen/X86/Instr.hs6
-rw-r--r--compiler/nativeGen/X86/Ppr.hs2
3 files changed, 28 insertions, 63 deletions
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 9591c42ede..37080b990e 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -2045,37 +2045,25 @@ genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] b
| otherwise = do
code_src <- getAnyReg src
+ src_r <- getNewRegNat format
+ tmp_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst)
- if isBmi2Enabled dflags
- then do
- src_r <- getNewRegNat (intFormat width)
- return $ appOL (code_src src_r) $ case width of
- W8 -> toOL
- [ MOVZxL II8 (OpReg src_r) (OpReg src_r) -- zero-extend to 32 bit
- , LZCNT II32 (OpReg src_r) dst_r -- lzcnt with extra 24 zeros
- , SUB II32 (OpImm (ImmInt 24)) (OpReg dst_r) -- compensate for extra zeros
- ]
- W16 -> toOL
- [ LZCNT II16 (OpReg src_r) dst_r
- , MOVZxL II16 (OpReg dst_r) (OpReg dst_r) -- zero-extend from 16 bit
- ]
- _ -> unitOL (LZCNT (intFormat width) (OpReg src_r) dst_r)
- else do
- let format = if width == W8 then II16 else intFormat width
- src_r <- getNewRegNat format
- tmp_r <- getNewRegNat format
- return $ code_src src_r `appOL` toOL
- ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
- [ BSR format (OpReg src_r) tmp_r
- , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
- , CMOV NE format (OpReg tmp_r) dst_r
- , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
- ]) -- NB: We don't need to zero-extend the result for the
- -- W8/W16 cases because the 'MOV' insn already
- -- took care of implicitly clearing the upper bits
+
+ -- The following insn sequence makes sure 'clz 0' has a defined value.
+ -- starting with Haswell, one could use the LZCNT insn instead.
+ return $ code_src src_r `appOL` toOL
+ ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
+ [ BSR format (OpReg src_r) tmp_r
+ , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
+ , CMOV NE format (OpReg tmp_r) dst_r
+ , XOR format (OpImm (ImmInt (bw-1))) (OpReg dst_r)
+ ]) -- NB: We don't need to zero-extend the result for the
+ -- W8/W16 cases because the 'MOV' insn already
+ -- took care of implicitly clearing the upper bits
where
bw = widthInBits width
platform = targetPlatform dflags
+ format = if width == W8 then II16 else intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (clzLabel width))
genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
@@ -2085,7 +2073,6 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
dst_r = getRegisterReg platform False (CmmLocal dst)
lbl1 <- getBlockIdNat
lbl2 <- getBlockIdNat
- let format = if width == W8 then II16 else intFormat width
tmp_r <- getNewRegNat format
-- New CFG Edges:
@@ -2122,38 +2109,24 @@ genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) [dst] [src] bid
| otherwise = do
code_src <- getAnyReg src
+ src_r <- getNewRegNat format
+ tmp_r <- getNewRegNat format
let dst_r = getRegisterReg platform False (CmmLocal dst)
- if isBmi2Enabled dflags
- then do
- src_r <- getNewRegNat (intFormat width)
- return $ appOL (code_src src_r) $ case width of
- W8 -> toOL
- [ OR II32 (OpImm (ImmInt 0xFFFFFF00)) (OpReg src_r)
- , TZCNT II32 (OpReg src_r) dst_r
- ]
- W16 -> toOL
- [ TZCNT II16 (OpReg src_r) dst_r
- , MOVZxL II16 (OpReg dst_r) (OpReg dst_r)
- ]
- _ -> unitOL $ TZCNT (intFormat width) (OpReg src_r) dst_r
- else do
- -- The following insn sequence makes sure 'ctz 0' has a defined value.
- -- starting with Haswell, one could use the TZCNT insn instead.
- let format = if width == W8 then II16 else intFormat width
- src_r <- getNewRegNat format
- tmp_r <- getNewRegNat format
- return $ code_src src_r `appOL` toOL
- ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
- [ BSF format (OpReg src_r) tmp_r
- , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
- , CMOV NE format (OpReg tmp_r) dst_r
- ]) -- NB: We don't need to zero-extend the result for the
- -- W8/W16 cases because the 'MOV' insn already
- -- took care of implicitly clearing the upper bits
+ -- The following insn sequence makes sure 'ctz 0' has a defined value.
+ -- starting with Haswell, one could use the TZCNT insn instead.
+ return $ code_src src_r `appOL` toOL
+ ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
+ [ BSF format (OpReg src_r) tmp_r
+ , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
+ , CMOV NE format (OpReg tmp_r) dst_r
+ ]) -- NB: We don't need to zero-extend the result for the
+ -- W8/W16 cases because the 'MOV' insn already
+ -- took care of implicitly clearing the upper bits
where
bw = widthInBits width
platform = targetPlatform dflags
+ format = if width == W8 then II16 else intFormat width
genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args bid = do
targetExpr <- cmmMakeDynamicReference dflags
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 5e790e481e..c47e1fae83 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -342,8 +342,6 @@ data Instr
-- bit counting instructions
| POPCNT Format Operand Reg -- [SSE4.2] count number of bits set to 1
- | LZCNT Format Operand Reg -- [BMI2] count number of leading zeros
- | TZCNT Format Operand Reg -- [BMI2] count number of trailing zeros
| BSF Format Operand Reg -- bit scan forward
| BSR Format Operand Reg -- bit scan reverse
@@ -473,8 +471,6 @@ x86_regUsageOfInstr platform instr
DELTA _ -> noUsage
POPCNT _ src dst -> mkRU (use_R src []) [dst]
- LZCNT _ src dst -> mkRU (use_R src []) [dst]
- TZCNT _ src dst -> mkRU (use_R src []) [dst]
BSF _ src dst -> mkRU (use_R src []) [dst]
BSR _ src dst -> mkRU (use_R src []) [dst]
@@ -657,8 +653,6 @@ x86_patchRegsOfInstr instr env
CLTD _ -> instr
POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
- LZCNT fmt src dst -> LZCNT fmt (patchOp src) (env dst)
- TZCNT fmt src dst -> TZCNT fmt (patchOp src) (env dst)
PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 075bb26337..bf449d044e 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -693,8 +693,6 @@ pprInstr (XOR FF64 src dst) = pprOpOp (sLit "xorpd") FF64 src dst
pprInstr (XOR format src dst) = pprFormatOpOp (sLit "xor") format src dst
pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst)
-pprInstr (LZCNT format src dst) = pprOpOp (sLit "lzcnt") format src (OpReg dst)
-pprInstr (TZCNT format src dst) = pprOpOp (sLit "tzcnt") format src (OpReg dst)
pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst)
pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst)