diff options
author | Johan Tibell <johan.tibell@gmail.com> | 2011-07-20 18:29:22 +0200 |
---|---|---|
committer | Simon Marlow <marlowsd@gmail.com> | 2011-08-16 16:48:04 +0100 |
commit | 2d0438f329ac153f9e59155f405d27fac0c43d65 (patch) | |
tree | bb3ed4a2fb183ca163a78b553b579079fae75c11 | |
parent | 49dbe60558deee5ea6cd2c7730b7c591d15559c8 (diff) | |
download | haskell-2d0438f329ac153f9e59155f405d27fac0c43d65.tar.gz |
Add popCnt# primop
-rw-r--r-- | compiler/cmm/CmmMachOp.hs | 2 | ||||
-rw-r--r-- | compiler/codeGen/CgPrimOp.hs | 17 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmPrim.hs | 14 | ||||
-rw-r--r-- | compiler/ghc.cabal.in | 1 | ||||
-rw-r--r-- | compiler/main/DynFlags.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/CPrim.hs | 14 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/CodeGen.hs | 3 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/CodeGen/CCall.hs | 3 | ||||
-rw-r--r-- | compiler/nativeGen/X86/CodeGen.hs | 30 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Instr.hs | 6 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 2 | ||||
-rw-r--r-- | compiler/prelude/primops.txt.pp | 19 |
12 files changed, 109 insertions, 4 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs index aa166847eb..2effa3a45f 100644 --- a/compiler/cmm/CmmMachOp.hs +++ b/compiler/cmm/CmmMachOp.hs @@ -448,6 +448,8 @@ data CallishMachOp | MO_Memcpy | MO_Memset | MO_Memmove + + | MO_PopCnt Width deriving (Eq, Show) pprCallishMachOp :: CallishMachOp -> SDoc diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs index c2a57a40d2..25d63d8002 100644 --- a/compiler/codeGen/CgPrimOp.hs +++ b/compiler/codeGen/CgPrimOp.hs @@ -374,6 +374,12 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] live = emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] live = doCopyMutableByteArrayOp src src_off dst dst_off n live +-- Population count +emitPrimOp [res] PopCnt8Op [w] live = emitPopCntCall res w W8 live +emitPrimOp [res] PopCnt16Op [w] live = emitPopCntCall res w W16 live +emitPrimOp [res] PopCnt32Op [w] live = emitPopCntCall res w W32 live +emitPrimOp [res] PopCnt64Op [w] live = emitPopCntCall res w W64 live +emitPrimOp [res] PopCntOp [w] live = emitPopCntCall res w wordWidth live -- The rest just translate straightforwardly emitPrimOp [res] op [arg] _ @@ -908,3 +914,14 @@ emitAllocateCall res cap n live = do where allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing ForeignLabelInExternalPackage IsFunction)) + +emitPopCntCall :: LocalReg -> CmmExpr -> Width -> StgLiveVars -> Code +emitPopCntCall res x width live = do + vols <- getVolatileRegs live + emitForeignCall' PlayRisky + [CmmHinted res NoHint] + (CmmPrim (MO_PopCnt width)) + [(CmmHinted x NoHint)] + (Just vols) + NoC_SRT -- No SRT b/c we do PlayRisky + CmmMayReturn diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs index c71d285735..b68bb601eb 100644 --- a/compiler/codeGen/StgCmmPrim.hs +++ b/compiler/codeGen/StgCmmPrim.hs @@ -443,6 +443,13 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] = emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] = doCopyMutableByteArrayOp src src_off dst dst_off n +-- Population count +emitPrimOp [res] PopCnt8Op [w] = emitPopCntCall res w W8 +emitPrimOp [res] PopCnt16Op [w] = emitPopCntCall res w W16 +emitPrimOp [res] PopCnt32Op [w] = emitPopCntCall res w W32 +emitPrimOp [res] PopCnt64Op [w] = emitPopCntCall res w W64 +emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth + -- The rest just translate straightforwardly emitPrimOp [res] op [arg] | nopOp op @@ -940,3 +947,10 @@ emitAllocateCall res cap n = do where allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing ForeignLabelInExternalPackage IsFunction)) + +emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode () +emitPopCntCall res x width = do + emitPrimCall + [ res ] + (MO_PopCnt width) + [ x ] diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in index 665e3831f2..e393bb7e7f 100644 --- a/compiler/ghc.cabal.in +++ b/compiler/ghc.cabal.in @@ -497,6 +497,7 @@ Library RegClass PIC Platform + CPrim X86.Regs X86.RegInfo X86.Instr diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs index d850ac7657..5b23876b36 100644 --- a/compiler/main/DynFlags.hs +++ b/compiler/main/DynFlags.hs @@ -276,6 +276,7 @@ data DynFlag | Opt_SharedImplib | Opt_BuildingCabalPackage | Opt_SSE2 + | Opt_SSE4_2 | Opt_GhciSandbox | Opt_HelpfulErrors @@ -1518,6 +1519,7 @@ dynamic_flags = [ , flagA "monly-3-regs" (NoArg (addWarn "The -monly-3-regs flag does nothing; it will be removed in a future GHC release")) , flagA "monly-4-regs" (NoArg (addWarn "The -monly-4-regs flag does nothing; it will be removed in a future GHC release")) , flagA "msse2" (NoArg (setDynFlag Opt_SSE2)) + , flagA "msse4.2" (NoArg (setDynFlag Opt_SSE4_2)) ------ Warning opts ------------------------------------------------- , flagA "W" (NoArg (mapM_ setWarningFlag minusWOpts)) diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs new file mode 100644 index 0000000000..09707ac5ae --- /dev/null +++ b/compiler/nativeGen/CPrim.hs @@ -0,0 +1,14 @@ +-- | Generating C symbol names emitted by the compiler. +module CPrim (popCntLabel) where + +import CmmType +import Outputable + +popCntLabel :: Width -> String +popCntLabel w = "hs_popcnt" ++ pprWidth w + where + pprWidth W8 = "8" + pprWidth W16 = "16" + pprWidth W32 = "32" + pprWidth W64 = "64" + pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w) diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs index a0e3ae92b5..b1936fe124 100644 --- a/compiler/nativeGen/PPC/CodeGen.hs +++ b/compiler/nativeGen/PPC/CodeGen.hs @@ -28,6 +28,7 @@ where import PPC.Instr import PPC.Cond import PPC.Regs +import CPrim import NCGMonad import Instruction import PIC @@ -1142,6 +1143,8 @@ genCCall' gcp target dest_regs argsAndHints MO_Memset -> (fsLit "memset", False) MO_Memmove -> (fsLit "memmove", False) + MO_PopCnt w -> (fsLit $ popCntLabel w, False) + other -> pprPanic "genCCall(ppc): unknown callish op" (pprCallishMachOp other) diff --git a/compiler/nativeGen/SPARC/CodeGen/CCall.hs b/compiler/nativeGen/SPARC/CodeGen/CCall.hs index 3e629c47f5..99ef441eab 100644 --- a/compiler/nativeGen/SPARC/CodeGen/CCall.hs +++ b/compiler/nativeGen/SPARC/CodeGen/CCall.hs @@ -13,6 +13,7 @@ import SPARC.Instr import SPARC.Imm import SPARC.Regs import SPARC.Base +import CPrim import NCGMonad import PIC import Instruction @@ -332,5 +333,7 @@ outOfLineMachOp_table mop MO_Memset -> fsLit "memset" MO_Memmove -> fsLit "memmove" + MO_PopCnt w -> fsLit $ popCntLabel w + _ -> pprPanic "outOfLineMachOp(sparc): Unknown callish mach op " (pprCallishMachOp mop) diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs index b47f11fa69..b929c5eb2e 100644 --- a/compiler/nativeGen/X86/CodeGen.hs +++ b/compiler/nativeGen/X86/CodeGen.hs @@ -28,6 +28,7 @@ import X86.Instr import X86.Cond import X86.Regs import X86.RegInfo +import CPrim import Instruction import PIC import NCGMonad @@ -70,9 +71,14 @@ sse2Enabled = do -- calling convention specifies the use of xmm regs, -- and possibly other places. return True - ArchX86 -> return (dopt Opt_SSE2 dflags) + ArchX86 -> return (dopt Opt_SSE2 dflags || dopt Opt_SSE4_2 dflags) _ -> panic "sse2Enabled: Not an X86* arch" +sse4_2Enabled :: NatM Bool +sse4_2Enabled = do + dflags <- getDynFlagsNat + return (dopt Opt_SSE4_2 dflags) + if_sse2 :: NatM a -> NatM a -> NatM a if_sse2 sse2 x87 = do b <- sse2Enabled @@ -1574,6 +1580,26 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL -- write barrier compiles to no code on x86/x86-64; -- we keep it this long in order to prevent earlier optimisations. +genCCall (CmmPrim (MO_PopCnt width)) dest_regs@[CmmHinted dst _] + args@[CmmHinted src _] = do + sse4_2 <- sse4_2Enabled + if sse4_2 + then do code_src <- getAnyReg src + src_r <- getNewRegNat size + return $ code_src src_r `appOL` + (if width == W8 then + -- The POPCNT instruction doesn't take a r/m8 + unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL` + unitOL (POPCNT II16 (OpReg src_r) + (getRegisterReg False (CmmLocal dst))) + else + unitOL (POPCNT size (OpReg src_r) + (getRegisterReg False (CmmLocal dst)))) + else genCCall (CmmCallee (fn width) CCallConv) dest_regs args + where size = intSize width + fn w = CmmLit (CmmLabel (mkForeignLabel (fsLit (popCntLabel w)) Nothing + ForeignLabelInExternalPackage IsFunction)) + genCCall target dest_regs args = do dflags <- getDynFlagsNat if target32Bit (targetPlatform dflags) @@ -1990,6 +2016,8 @@ outOfLineCmmOp mop res args MO_Memset -> fsLit "memset" MO_Memmove -> fsLit "memmove" + MO_PopCnt _ -> fsLit "popcnt" + other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")" diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs index 0e292ac21f..fd0fa7867a 100644 --- a/compiler/nativeGen/X86/Instr.hs +++ b/compiler/nativeGen/X86/Instr.hs @@ -310,6 +310,8 @@ data Instr -- call 1f -- 1: popl %reg + -- SSE4.2 + | POPCNT Size Operand Reg -- src, dst data Operand = OpReg Reg -- register @@ -403,6 +405,8 @@ x86_regUsageOfInstr instr COMMENT _ -> noUsage DELTA _ -> noUsage + POPCNT _ src dst -> mkRU (use_R src) [dst] + _other -> panic "regUsage: unrecognised instr" where @@ -539,6 +543,8 @@ x86_patchRegsOfInstr instr env JXX_GBL _ _ -> instr CLTD _ -> instr + POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst) + _other -> panic "patchRegs: unrecognised instr" where diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index a755d839fb..9ac33f2598 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -574,6 +574,8 @@ pprInstr platform (XOR FF32 src dst) = pprOpOp platform (sLit "xorps") FF32 src pprInstr platform (XOR FF64 src dst) = pprOpOp platform (sLit "xorpd") FF64 src dst pprInstr platform (XOR size src dst) = pprSizeOpOp platform (sLit "xor") size src dst +pprInstr platform (POPCNT size src dst) = pprOpOp platform (sLit "popcnt") size src (OpReg dst) + pprInstr platform (NOT size op) = pprSizeOp platform (sLit "not") size op pprInstr platform (NEGI size op) = pprSizeOp platform (sLit "neg") size op diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp index 49498466e3..ee0ec22e6e 100644 --- a/compiler/prelude/primops.txt.pp +++ b/compiler/prelude/primops.txt.pp @@ -302,6 +302,22 @@ primop WordNeOp "neWord#" Compare Word# -> Word# -> Bool primop WordLtOp "ltWord#" Compare Word# -> Word# -> Bool primop WordLeOp "leWord#" Compare Word# -> Word# -> Bool +primop PopCnt8Op "popCnt8#" Monadic Word# -> Word# + {Count the number of set bits in the lower 8 bits of a word.} +primop PopCnt16Op "popCnt16#" Monadic Word# -> Word# + {Count the number of set bits in the lower 16 bits of a word.} +primop PopCnt32Op "popCnt32#" Monadic Word# -> Word# + {Count the number of set bits in the lower 32 bits of a word.} +#if WORD_SIZE_IN_BITS < 64 +primop PopCnt64Op "popCnt64#" Monadic Word64# -> Word# + {Count the number of set bits in a 64-bit word.} +#else +primop PopCnt64Op "popCnt64#" Monadic Word# -> Word# + {Count the number of set bits in a 64-bit word.} +#endif +primop PopCntOp "popCnt#" Monadic Word# -> Word# + {Count the number of set bits in a word.} + ------------------------------------------------------------------------ section "Narrowings" {Explicit narrowing of native-sized ints or words.} @@ -1926,6 +1942,3 @@ primop TraceEventOp "traceEvent#" GenPrimOp ------------------------------------------------------------------------ thats_all_folks - - - |