diff options
Diffstat (limited to 'compiler/nativeGen')
-rw-r--r-- | compiler/nativeGen/CPrim.hs | 20 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 69 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 9 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 13 |
6 files changed, 115 insertions, 0 deletions
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs index ad61a002d3..399d646000 100644 --- a/compiler/nativeGen/CPrim.hs +++ b/compiler/nativeGen/CPrim.hs @@ -5,6 +5,8 @@ module CPrim , atomicRMWLabel , cmpxchgLabel , popCntLabel + , pdepLabel + , pextLabel , bSwapLabel , clzLabel , ctzLabel @@ -26,6 +28,24 @@ popCntLabel w = "hs_popcnt" ++ pprWidth w pprWidth W64 = "64" pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w) +pdepLabel :: Width -> String +pdepLabel w = "hs_pdep" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "pdepLabel: Unsupported word width " (ppr w) + +pextLabel :: Width -> String +pextLabel w = "hs_pext" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "pextLabel: Unsupported word width " (ppr w) + bSwapLabel :: Width -> String bSwapLabel w = "hs_bswap" ++ pprWidth w where diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index 898a31a657..e2c568c836 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -2004,6 +2004,8 @@ genCCall' dflags gcp target dest_regs args MO_BSwap w -> (fsLit $ bSwapLabel w, False) MO_PopCnt w -> (fsLit $ popCntLabel w, False) + MO_Pdep w -> (fsLit $ pdepLabel w, False) + MO_Pext w -> (fsLit $ pextLabel w, False) MO_Clz _ -> unsupported MO_Ctz _ -> unsupported MO_AtomicRMW {} -> unsupported diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs index 55c1d1531d..6dfd58950e 100644 --- a/compiler/nativeGen/SPARC/CodeGen.hs +++ b/compiler/nativeGen/SPARC/CodeGen.hs @@ -654,6 +654,8 @@ outOfLineMachOp_table mop MO_BSwap w -> fsLit $ bSwapLabel w MO_PopCnt w -> fsLit $ popCntLabel w + MO_Pdep w -> fsLit $ pdepLabel w + MO_Pext w -> fsLit $ pextLabel w MO_Clz w -> fsLit $ clzLabel w MO_Ctz w -> fsLit $ ctzLabel w MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index 6c0e0ac783..62ed72163d 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -1872,6 +1872,72 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst] format = intFormat width lbl = mkCmmCodeLabel primUnitId (fsLit (popCntLabel width)) +genCCall dflags is32Bit (PrimTarget (MO_Pdep width)) dest_regs@[dst] + args@[src, mask] = do + let platform = targetPlatform dflags + if isBmi2Enabled dflags + then do code_src <- getAnyReg src + code_mask <- getAnyReg mask + src_r <- getNewRegNat format + mask_r <- getNewRegNat format + let dst_r = getRegisterReg platform False (CmmLocal dst) + return $ code_src src_r `appOL` code_mask mask_r `appOL` + (if width == W8 then + -- The PDEP instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` + unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` + unitOL (PDEP II16 (OpReg mask_r) (OpReg src_r ) dst_r) + else + unitOL (PDEP format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` + (if width == W8 || width == W16 then + -- We used a 16-bit destination register above, + -- so zero-extend + unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) + else nilOL) + else do + targetExpr <- cmmMakeDynamicReference dflags + CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + where + format = intFormat width + lbl = mkCmmCodeLabel primUnitId (fsLit (pdepLabel width)) + +genCCall dflags is32Bit (PrimTarget (MO_Pext width)) dest_regs@[dst] + args@[src, mask] = do + let platform = targetPlatform dflags + if isBmi2Enabled dflags + then do code_src <- getAnyReg src + code_mask <- getAnyReg mask + src_r <- getNewRegNat format + mask_r <- getNewRegNat format + let dst_r = getRegisterReg platform False (CmmLocal dst) + return $ code_src src_r `appOL` code_mask mask_r `appOL` + (if width == W8 then + -- The PEXT instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r ) (OpReg src_r )) `appOL` + unitOL (MOVZxL II8 (OpReg mask_r) (OpReg mask_r)) `appOL` + unitOL (PEXT II16 (OpReg mask_r) (OpReg src_r) dst_r) + else + unitOL (PEXT format (OpReg mask_r) (OpReg src_r) dst_r)) `appOL` + (if width == W8 || width == W16 then + -- We used a 16-bit destination register above, + -- so zero-extend + unitOL (MOVZxL II16 (OpReg dst_r) (OpReg dst_r)) + else nilOL) + else do + targetExpr <- cmmMakeDynamicReference dflags + CallReference lbl + let target = ForeignTarget targetExpr (ForeignConvention CCallConv + [NoHint] [NoHint] + CmmMayReturn) + genCCall dflags is32Bit target dest_regs args + where + format = intFormat width + lbl = mkCmmCodeLabel primUnitId (fsLit (pextLabel width)) + genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src] | is32Bit && width == W64 = do -- Fallback to `hs_clz64` on i386 @@ -2689,6 +2755,9 @@ outOfLineCmmOp mop res args MO_Clz w -> fsLit $ clzLabel w MO_Ctz _ -> unsupported + MO_Pdep _ -> fsLit "hs_pdep" + MO_Pext _ -> fsLit "hs_pext" + MO_AtomicRMW _ _ -> fsLit "atomicrmw" MO_AtomicRead _ -> fsLit "atomicread" MO_AtomicWrite _ -> fsLit "atomicwrite" diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 1bb682ad87..fbe7383187 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -345,6 +345,10 @@ data Instr | BSF Format Operand Reg -- bit scan forward | BSR Format Operand Reg -- bit scan reverse + -- bit manipulation instructions + | PDEP Format Operand Operand Reg -- [BMI2] deposit bits to the specified mask + | PEXT Format Operand Operand Reg -- [BMI2] extract bits from the specified mask + -- prefetch | PREFETCH PrefetchVariant Format Operand -- prefetch Variant, addr size, address to prefetch -- variant can be NTA, Lvl0, Lvl1, or Lvl2 @@ -464,6 +468,9 @@ x86_regUsageOfInstr platform instr BSF _ src dst -> mkRU (use_R src []) [dst] BSR _ src dst -> mkRU (use_R src []) [dst] + PDEP _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] + PEXT _ src mask dst -> mkRU (use_R src $ use_R mask []) [dst] + -- note: might be a better way to do this PREFETCH _ _ src -> mkRU (use_R src []) [] LOCK i -> x86_regUsageOfInstr platform i @@ -640,6 +647,8 @@ x86_patchRegsOfInstr instr env CLTD _ -> instr POPCNT fmt src dst -> POPCNT fmt (patchOp src) (env dst) + PDEP fmt src mask dst -> PDEP fmt (patchOp src) (patchOp mask) (env dst) + PEXT fmt src mask dst -> PEXT fmt (patchOp src) (patchOp mask) (env dst) BSF fmt src dst -> BSF fmt (patchOp src) (env dst) BSR fmt src dst -> BSR fmt (patchOp src) (env dst) diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index 84ce7516b5..f5011b2a95 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -648,6 +648,9 @@ pprInstr (POPCNT format src dst) = pprOpOp (sLit "popcnt") format src (OpReg dst pprInstr (BSF format src dst) = pprOpOp (sLit "bsf") format src (OpReg dst) pprInstr (BSR format src dst) = pprOpOp (sLit "bsr") format src (OpReg dst) +pprInstr (PDEP format src mask dst) = pprFormatOpOpReg (sLit "pdep") format src mask dst +pprInstr (PEXT format src mask dst) = pprFormatOpOpReg (sLit "pext") format src mask dst + pprInstr (PREFETCH NTA format src ) = pprFormatOp_ (sLit "prefetchnta") format src pprInstr (PREFETCH Lvl0 format src) = pprFormatOp_ (sLit "prefetcht0") format src pprInstr (PREFETCH Lvl1 format src) = pprFormatOp_ (sLit "prefetcht1") format src @@ -1262,6 +1265,16 @@ pprFormatRegRegReg name format reg1 reg2 reg3 pprReg format reg3 ] +pprFormatOpOpReg :: LitString -> Format -> Operand -> Operand -> Reg -> SDoc +pprFormatOpOpReg name format op1 op2 reg3 + = hcat [ + pprMnemonic name format, + pprOperand format op1, + comma, + pprOperand format op2, + comma, + pprReg format reg3 + ] pprFormatAddrReg :: LitString -> Format -> AddrMode -> Reg -> SDoc pprFormatAddrReg name format op dst |