summaryrefslogtreecommitdiff
path: root/compiler/nativeGen
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r--compiler/nativeGen/CPrim.hs20
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs2
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs69
-rw-r--r--compiler/nativeGen/X86/Instr.hs9
-rw-r--r--compiler/nativeGen/X86/Ppr.hs13
6 files changed, 115 insertions, 0 deletions
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
index ad61a002d3..399d646000 100644
--- a/compiler/nativeGen/CPrim.hs
+++ b/compiler/nativeGen/CPrim.hs
@@ -5,6 +5,8 @@ module CPrim
, atomicRMWLabel
, cmpxchgLabel
, popCntLabel
+ , pdepLabel
+ , pextLabel
, bSwapLabel
, clzLabel
, ctzLabel
@@ -26,6 +28,24 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
+pdepLabel :: Width -> String
+pdepLabel w = "hs_pdep" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "pdepLabel: Unsupported word width " (ppr w)
+
+pextLabel :: Width -> String
+pextLabel w = "hs_pext" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "pextLabel: Unsupported word width " (ppr w)
+
bSwapLabel :: Width -> String
bSwapLabel w = "hs_bswap" ++ pprWidth w
where
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 898a31a657..e2c568c836 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -2004,6 +2004,8 @@ genCCall' dflags gcp target dest_regs args
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
+ MO_Pdep w -> (fsLit $ pdepLabel w, False)
+ MO_Pext w -> (fsLit $ pextLabel w, False)
MO_Clz _ -> unsupported
MO_Ctz _ -> unsupported
MO_AtomicRMW {} -> unsupported
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 55c1d1531d..6dfd58950e 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -654,6 +654,8 @@ outOfLineMachOp_table mop
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
+ MO_Pdep w -> fsLit $ pdepLabel w
+ MO_Pext w -> fsLit $ pextLabel w
MO_Clz w -> fsLit $ clzLabel w
MO_Ctz w -> fsLit $ ctzLabel w
MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index 6c0e0ac783..62ed72163d 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1872,6 +1872,72 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
format = intFormat width
lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width))
+genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst]
+ args@[src, mask] = do
+ let platform = targetPlatform dflags
+ if isBmi2Enabled dflags
+ then do code_src <- getAnyReg src
+ code_mask <- getAnyReg mask
+ src_r <- getNewRegNat format
+ mask_r <- getNewRegNat format
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+ return $ code_src src_r `appOL` code_mask mask_r `appOL`
+ (if width == W8 then
+ -- The PDEP instruction doesn't take a r/m8
+ unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
+ unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
+ unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r)
+ else
+ unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
+ (if width == W8 || width == W16 then
+ -- We used a 16-bit destination register above,
+ -- so zero-extend
+ unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
+ else nilOL)
+ else do
+ targetExpr <- cmmMakeDynamicReference dflags
+ CallReference lbl
+ let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+ [NoHint] [NoHint]
+ CmmMayReturn)
+ genCCall dflags is32Bit target dest_regs args
+ where
+ format = intFormat width
+ lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width))
+
+genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst]
+ args@[src, mask] = do
+ let platform = targetPlatform dflags
+ if isBmi2Enabled dflags
+ then do code_src <- getAnyReg src
+ code_mask <- getAnyReg mask
+ src_r <- getNewRegNat format
+ mask_r <- getNewRegNat format
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+ return $ code_src src_r `appOL` code_mask mask_r `appOL`
+ (if width == W8 then
+ -- The PEXT instruction doesn't take a r/m8
+ unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL`
+ unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL`
+ unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r)
+ else
+ unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL`
+ (if width == W8 || width == W16 then
+ -- We used a 16-bit destination register above,
+ -- so zero-extend
+ unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r))
+ else nilOL)
+ else do
+ targetExpr <- cmmMakeDynamicReference dflags
+ CallReference lbl
+ let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+ [NoHint] [NoHint]
+ CmmMayReturn)
+ genCCall dflags is32Bit target dest_regs args
+ where
+ format = intFormat width
+ lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width))
+
genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
| is32Bit && width == W64 = do
-- Fallback to `hs_clz64` on i386
@@ -2689,6 +2755,9 @@ outOfLineCmmOp mop res args
MO_Clz w -> fsLit $ clzLabel w
MO_Ctz _ -> unsupported
+ MO_Pdep _ -> fsLit "hs_pdep"
+ MO_Pext _ -> fsLit "hs_pext"
+
MO_AtomicRMW _ _ -> fsLit "atomicrmw"
MO_AtomicRead _ -> fsLit "atomicread"
MO_AtomicWrite _ -> fsLit "atomicwrite"
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 1bb682ad87..fbe7383187 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -345,6 +345,10 @@ data Instr
| BSF Format Operand Reg -- bit scan forward
| BSR Format Operand Reg -- bit scan reverse
+ -- bit manipulation instructions
+ | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask
+ | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask
+
-- prefetch
| PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch
-- variant can be NTA, Lvl0, Lvl1, or Lvl2
@@ -464,6 +468,9 @@ x86_regUsageOfInstr platform instr
BSF _ src dst -> mkRU (use_R src []) [dst]
BSR _ src dst -> mkRU (use_R src []) [dst]
+ PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+ PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst]
+
-- note: might be a better way to do this
PREFETCH _ _ src -> mkRU (use_R src []) []
LOCK i -> x86_regUsageOfInstr platform i
@@ -640,6 +647,8 @@ x86_patchRegsOfInstr instr env
CLTD _ -> instr
POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst)
+ PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst)
+ PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst)
BSF fmt src dst -> BSF fmt (patchOp src) (env dst)
BSR fmt src dst -> BSR fmt (patchOp src) (env dst)
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index 84ce7516b5..f5011b2a95 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -648,6 +648,9 @@ pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst
pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst)
pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst)
+pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst
+pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst
+
pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src
pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src
pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src
@@ -1262,6 +1265,16 @@ pprFormatRegRegReg name format reg1 reg2 reg3
pprReg format reg3
]
+pprFormatOpOpReg :: LitString -> Format -> Operand -> Operand -> Reg -> SDoc
+pprFormatOpOpReg name format op1 op2 reg3
+ = hcat [
+ pprMnemonic name format,
+ pprOperand format op1,
+ comma,
+ pprOperand format op2,
+ comma,
+ pprReg format reg3
+ ]
pprFormatAddrReg :: LitString -> Format -> AddrMode -> Reg -> SDoc
pprFormatAddrReg name format op dst