summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohan Tibell <johan.tibell@gmail.com>2011-07-20 18:29:22 +0200
committerSimon Marlow <marlowsd@gmail.com>2011-08-16 16:48:04 +0100
commit2d0438f329ac153f9e59155f405d27fac0c43d65 (patch)
treebb3ed4a2fb183ca163a78b553b579079fae75c11
parent49dbe60558deee5ea6cd2c7730b7c591d15559c8 (diff)
downloadhaskell-2d0438f329ac153f9e59155f405d27fac0c43d65.tar.gz
Add popCnt# primop
-rw-r--r--compiler/cmm/CmmMachOp.hs2
-rw-r--r--compiler/codeGen/CgPrimOp.hs17
-rw-r--r--compiler/codeGen/StgCmmPrim.hs14
-rw-r--r--compiler/ghc.cabal.in1
-rw-r--r--compiler/main/DynFlags.hs2
-rw-r--r--compiler/nativeGen/CPrim.hs14
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs3
-rw-r--r--compiler/nativeGen/SPARC/CodeGen/CCall.hs3
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs30
-rw-r--r--compiler/nativeGen/X86/Instr.hs6
-rw-r--r--compiler/nativeGen/X86/Ppr.hs2
-rw-r--r--compiler/prelude/primops.txt.pp19
12 files changed, 109 insertions, 4 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index aa166847eb..2effa3a45f 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -448,6 +448,8 @@ data CallishMachOp
| MO_Memcpy
| MO_Memset
| MO_Memmove
+
+ | MO_PopCnt Width
deriving (Eq, Show)
pprCallishMachOp :: CallishMachOp -> SDoc
diff --git a/compiler/codeGen/CgPrimOp.hs b/compiler/codeGen/CgPrimOp.hs
index c2a57a40d2..25d63d8002 100644
--- a/compiler/codeGen/CgPrimOp.hs
+++ b/compiler/codeGen/CgPrimOp.hs
@@ -374,6 +374,12 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] live =
emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] live =
doCopyMutableByteArrayOp src src_off dst dst_off n live
+-- Population count
+emitPrimOp [res] PopCnt8Op [w] live = emitPopCntCall res w W8 live
+emitPrimOp [res] PopCnt16Op [w] live = emitPopCntCall res w W16 live
+emitPrimOp [res] PopCnt32Op [w] live = emitPopCntCall res w W32 live
+emitPrimOp [res] PopCnt64Op [w] live = emitPopCntCall res w W64 live
+emitPrimOp [res] PopCntOp [w] live = emitPopCntCall res w wordWidth live
-- The rest just translate straightforwardly
emitPrimOp [res] op [arg] _
@@ -908,3 +914,14 @@ emitAllocateCall res cap n live = do
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
+
+emitPopCntCall :: LocalReg -> CmmExpr -> Width -> StgLiveVars -> Code
+emitPopCntCall res x width live = do
+ vols <- getVolatileRegs live
+ emitForeignCall' PlayRisky
+ [CmmHinted res NoHint]
+ (CmmPrim (MO_PopCnt width))
+ [(CmmHinted x NoHint)]
+ (Just vols)
+ NoC_SRT -- No SRT b/c we do PlayRisky
+ CmmMayReturn
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index c71d285735..b68bb601eb 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -443,6 +443,13 @@ emitPrimOp [] CopyByteArrayOp [src,src_off,dst,dst_off,n] =
emitPrimOp [] CopyMutableByteArrayOp [src,src_off,dst,dst_off,n] =
doCopyMutableByteArrayOp src src_off dst dst_off n
+-- Population count
+emitPrimOp [res] PopCnt8Op [w] = emitPopCntCall res w W8
+emitPrimOp [res] PopCnt16Op [w] = emitPopCntCall res w W16
+emitPrimOp [res] PopCnt32Op [w] = emitPopCntCall res w W32
+emitPrimOp [res] PopCnt64Op [w] = emitPopCntCall res w W64
+emitPrimOp [res] PopCntOp [w] = emitPopCntCall res w wordWidth
+
-- The rest just translate straightforwardly
emitPrimOp [res] op [arg]
| nopOp op
@@ -940,3 +947,10 @@ emitAllocateCall res cap n = do
where
allocate = CmmLit (CmmLabel (mkForeignLabel (fsLit "allocate") Nothing
ForeignLabelInExternalPackage IsFunction))
+
+emitPopCntCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitPopCntCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_PopCnt width)
+ [ x ]
diff --git a/compiler/ghc.cabal.in b/compiler/ghc.cabal.in
index 665e3831f2..e393bb7e7f 100644
--- a/compiler/ghc.cabal.in
+++ b/compiler/ghc.cabal.in
@@ -497,6 +497,7 @@ Library
RegClass
PIC
Platform
+ CPrim
X86.Regs
X86.RegInfo
X86.Instr
diff --git a/compiler/main/DynFlags.hs b/compiler/main/DynFlags.hs
index d850ac7657..5b23876b36 100644
--- a/compiler/main/DynFlags.hs
+++ b/compiler/main/DynFlags.hs
@@ -276,6 +276,7 @@ data DynFlag
| Opt_SharedImplib
| Opt_BuildingCabalPackage
| Opt_SSE2
+ | Opt_SSE4_2
| Opt_GhciSandbox
| Opt_HelpfulErrors
@@ -1518,6 +1519,7 @@ dynamic_flags = [
, flagA "monly-3-regs" (NoArg (addWarn "The -monly-3-regs flag does nothing; it will be removed in a future GHC release"))
, flagA "monly-4-regs" (NoArg (addWarn "The -monly-4-regs flag does nothing; it will be removed in a future GHC release"))
, flagA "msse2" (NoArg (setDynFlag Opt_SSE2))
+ , flagA "msse4.2" (NoArg (setDynFlag Opt_SSE4_2))
------ Warning opts -------------------------------------------------
, flagA "W" (NoArg (mapM_ setWarningFlag minusWOpts))
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
new file mode 100644
index 0000000000..09707ac5ae
--- /dev/null
+++ b/compiler/nativeGen/CPrim.hs
@@ -0,0 +1,14 @@
+-- | Generating C symbol names emitted by the compiler.
+module CPrim (popCntLabel) where
+
+import CmmType
+import Outputable
+
+popCntLabel :: Width -> String
+popCntLabel w = "hs_popcnt" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "popCntLabel: Unsupported word width " (ppr w)
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index a0e3ae92b5..b1936fe124 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -28,6 +28,7 @@ where
import PPC.Instr
import PPC.Cond
import PPC.Regs
+import CPrim
import NCGMonad
import Instruction
import PIC
@@ -1142,6 +1143,8 @@ genCCall' gcp target dest_regs argsAndHints
MO_Memset -> (fsLit "memset", False)
MO_Memmove -> (fsLit "memmove", False)
+ MO_PopCnt w -> (fsLit $ popCntLabel w, False)
+
other -> pprPanic "genCCall(ppc): unknown callish op"
(pprCallishMachOp other)
diff --git a/compiler/nativeGen/SPARC/CodeGen/CCall.hs b/compiler/nativeGen/SPARC/CodeGen/CCall.hs
index 3e629c47f5..99ef441eab 100644
--- a/compiler/nativeGen/SPARC/CodeGen/CCall.hs
+++ b/compiler/nativeGen/SPARC/CodeGen/CCall.hs
@@ -13,6 +13,7 @@ import SPARC.Instr
import SPARC.Imm
import SPARC.Regs
import SPARC.Base
+import CPrim
import NCGMonad
import PIC
import Instruction
@@ -332,5 +333,7 @@ outOfLineMachOp_table mop
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
+ MO_PopCnt w -> fsLit $ popCntLabel w
+
_ -> pprPanic "outOfLineMachOp(sparc): Unknown callish mach op "
(pprCallishMachOp mop)
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index b47f11fa69..b929c5eb2e 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -28,6 +28,7 @@ import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
+import CPrim
import Instruction
import PIC
import NCGMonad
@@ -70,9 +71,14 @@ sse2Enabled = do
-- calling convention specifies the use of xmm regs,
-- and possibly other places.
return True
- ArchX86 -> return (dopt Opt_SSE2 dflags)
+ ArchX86 -> return (dopt Opt_SSE2 dflags || dopt Opt_SSE4_2 dflags)
_ -> panic "sse2Enabled: Not an X86* arch"
+sse4_2Enabled :: NatM Bool
+sse4_2Enabled = do
+ dflags <- getDynFlagsNat
+ return (dopt Opt_SSE4_2 dflags)
+
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
b <- sse2Enabled
@@ -1574,6 +1580,26 @@ genCCall (CmmPrim MO_WriteBarrier) _ _ = return nilOL
-- write barrier compiles to no code on x86/x86-64;
-- we keep it this long in order to prevent earlier optimisations.
+genCCall (CmmPrim (MO_PopCnt width)) dest_regs@[CmmHinted dst _]
+ args@[CmmHinted src _] = do
+ sse4_2 <- sse4_2Enabled
+ if sse4_2
+ then do code_src <- getAnyReg src
+ src_r <- getNewRegNat size
+ return $ code_src src_r `appOL`
+ (if width == W8 then
+ -- The POPCNT instruction doesn't take a r/m8
+ unitOL (MOVZxL II8 (OpReg src_r) (OpReg src_r)) `appOL`
+ unitOL (POPCNT II16 (OpReg src_r)
+ (getRegisterReg False (CmmLocal dst)))
+ else
+ unitOL (POPCNT size (OpReg src_r)
+ (getRegisterReg False (CmmLocal dst))))
+ else genCCall (CmmCallee (fn width) CCallConv) dest_regs args
+ where size = intSize width
+ fn w = CmmLit (CmmLabel (mkForeignLabel (fsLit (popCntLabel w)) Nothing
+ ForeignLabelInExternalPackage IsFunction))
+
genCCall target dest_regs args =
do dflags <- getDynFlagsNat
if target32Bit (targetPlatform dflags)
@@ -1990,6 +2016,8 @@ outOfLineCmmOp mop res args
MO_Memset -> fsLit "memset"
MO_Memmove -> fsLit "memmove"
+ MO_PopCnt _ -> fsLit "popcnt"
+
other -> panic $ "outOfLineCmmOp: unmatched op! (" ++ show other ++ ")"
diff --git a/compiler/nativeGen/X86/Instr.hs b/compiler/nativeGen/X86/Instr.hs
index 0e292ac21f..fd0fa7867a 100644
--- a/compiler/nativeGen/X86/Instr.hs
+++ b/compiler/nativeGen/X86/Instr.hs
@@ -310,6 +310,8 @@ data Instr
-- call 1f
-- 1: popl %reg
+ -- SSE4.2
+ | POPCNT Size Operand Reg -- src, dst
data Operand
= OpReg Reg -- register
@@ -403,6 +405,8 @@ x86_regUsageOfInstr instr
COMMENT _ -> noUsage
DELTA _ -> noUsage
+ POPCNT _ src dst -> mkRU (use_R src) [dst]
+
_other -> panic "regUsage: unrecognised instr"
where
@@ -539,6 +543,8 @@ x86_patchRegsOfInstr instr env
JXX_GBL _ _ -> instr
CLTD _ -> instr
+ POPCNT sz src dst -> POPCNT sz (patchOp src) (env dst)
+
_other -> panic "patchRegs: unrecognised instr"
where
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index a755d839fb..9ac33f2598 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -574,6 +574,8 @@ pprInstr platform (XOR FF32 src dst) = pprOpOp platform (sLit "xorps") FF32 src
pprInstr platform (XOR FF64 src dst) = pprOpOp platform (sLit "xorpd") FF64 src dst
pprInstr platform (XOR size src dst) = pprSizeOpOp platform (sLit "xor") size src dst
+pprInstr platform (POPCNT size src dst) = pprOpOp platform (sLit "popcnt") size src (OpReg dst)
+
pprInstr platform (NOT size op) = pprSizeOp platform (sLit "not") size op
pprInstr platform (NEGI size op) = pprSizeOp platform (sLit "neg") size op
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 49498466e3..ee0ec22e6e 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -302,6 +302,22 @@ primop WordNeOp "neWord#" Compare Word# -> Word# -> Bool
primop WordLtOp "ltWord#" Compare Word# -> Word# -> Bool
primop WordLeOp "leWord#" Compare Word# -> Word# -> Bool
+primop PopCnt8Op "popCnt8#" Monadic Word# -> Word#
+ {Count the number of set bits in the lower 8 bits of a word.}
+primop PopCnt16Op "popCnt16#" Monadic Word# -> Word#
+ {Count the number of set bits in the lower 16 bits of a word.}
+primop PopCnt32Op "popCnt32#" Monadic Word# -> Word#
+ {Count the number of set bits in the lower 32 bits of a word.}
+#if WORD_SIZE_IN_BITS < 64
+primop PopCnt64Op "popCnt64#" Monadic Word64# -> Word#
+ {Count the number of set bits in a 64-bit word.}
+#else
+primop PopCnt64Op "popCnt64#" Monadic Word# -> Word#
+ {Count the number of set bits in a 64-bit word.}
+#endif
+primop PopCntOp "popCnt#" Monadic Word# -> Word#
+ {Count the number of set bits in a word.}
+
------------------------------------------------------------------------
section "Narrowings"
{Explicit narrowing of native-sized ints or words.}
@@ -1926,6 +1942,3 @@ primop TraceEventOp "traceEvent#" GenPrimOp
------------------------------------------------------------------------
thats_all_folks
-
-
-