summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/cmm/CmmMachOp.hs3
-rw-r--r--compiler/cmm/PprC.hs2
-rw-r--r--compiler/codeGen/StgCmmPrim.hs28
-rw-r--r--compiler/llvmGen/LlvmCodeGen/CodeGen.hs9
-rw-r--r--compiler/nativeGen/CPrim.hs20
-rw-r--r--compiler/nativeGen/PPC/CodeGen.hs2
-rw-r--r--compiler/nativeGen/SPARC/CodeGen.hs2
-rw-r--r--compiler/nativeGen/X86/CodeGen.hs65
-rw-r--r--compiler/prelude/primops.txt.pp22
-rw-r--r--includes/stg/Prim.h12
-rw-r--r--libraries/ghc-prim/cbits/clz.c41
-rw-r--r--libraries/ghc-prim/cbits/ctz.c41
-rw-r--r--libraries/ghc-prim/ghc-prim.cabal2
-rw-r--r--testsuite/.gitignore1
-rw-r--r--testsuite/tests/codeGen/should_run/T9340.hs96
-rw-r--r--testsuite/tests/codeGen/should_run/T9340.stdout1
-rw-r--r--testsuite/tests/codeGen/should_run/all.T1
17 files changed, 347 insertions, 1 deletions
diff --git a/compiler/cmm/CmmMachOp.hs b/compiler/cmm/CmmMachOp.hs
index d8ce492de1..a7b2c85175 100644
--- a/compiler/cmm/CmmMachOp.hs
+++ b/compiler/cmm/CmmMachOp.hs
@@ -549,6 +549,9 @@ data CallishMachOp
| MO_Memmove
| MO_PopCnt Width
+ | MO_Clz Width
+ | MO_Ctz Width
+
| MO_BSwap Width
-- Atomic read-modify-write.
diff --git a/compiler/cmm/PprC.hs b/compiler/cmm/PprC.hs
index 455c79ba02..93a5d06259 100644
--- a/compiler/cmm/PprC.hs
+++ b/compiler/cmm/PprC.hs
@@ -753,6 +753,8 @@ pprCallishMachOp_for_C mop
MO_Memmove -> ptext (sLit "memmove")
(MO_BSwap w) -> ptext (sLit $ bSwapLabel w)
(MO_PopCnt w) -> ptext (sLit $ popCntLabel w)
+ (MO_Clz w) -> ptext (sLit $ clzLabel w)
+ (MO_Ctz w) -> ptext (sLit $ ctzLabel w)
(MO_AtomicRMW w amop) -> ptext (sLit $ atomicRMWLabel w amop)
(MO_Cmpxchg w) -> ptext (sLit $ cmpxchgLabel w)
(MO_AtomicRead w) -> ptext (sLit $ atomicReadLabel w)
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 77739fe0fb..9e12427355 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -563,6 +563,20 @@ emitPrimOp _ [res] PopCnt32Op [w] = emitPopCntCall res w W32
emitPrimOp _ [res] PopCnt64Op [w] = emitPopCntCall res w W64
emitPrimOp dflags [res] PopCntOp [w] = emitPopCntCall res w (wordWidth dflags)
+-- count leading zeros
+emitPrimOp _ [res] Clz8Op [w] = emitClzCall res w W8
+emitPrimOp _ [res] Clz16Op [w] = emitClzCall res w W16
+emitPrimOp _ [res] Clz32Op [w] = emitClzCall res w W32
+emitPrimOp _ [res] Clz64Op [w] = emitClzCall res w W64
+emitPrimOp dflags [res] ClzOp [w] = emitClzCall res w (wordWidth dflags)
+
+-- count trailing zeros
+emitPrimOp _ [res] Ctz8Op [w] = emitCtzCall res w W8
+emitPrimOp _ [res] Ctz16Op [w] = emitCtzCall res w W16
+emitPrimOp _ [res] Ctz32Op [w] = emitCtzCall res w W32
+emitPrimOp _ [res] Ctz64Op [w] = emitCtzCall res w W64
+emitPrimOp dflags [res] CtzOp [w] = emitCtzCall res w (wordWidth dflags)
+
-- Unsigned int to floating point conversions
emitPrimOp _ [res] Word2FloatOp [w] = emitPrimCall [res]
(MO_UF_Conv W32) [w]
@@ -2096,3 +2110,17 @@ emitPopCntCall res x width = do
[ res ]
(MO_PopCnt width)
[ x ]
+
+emitClzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitClzCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_Clz width)
+ [ x ]
+
+emitCtzCall :: LocalReg -> CmmExpr -> Width -> FCode ()
+emitCtzCall res x width = do
+ emitPrimCall
+ [ res ]
+ (MO_Ctz width)
+ [ x ]
diff --git a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
index 4a56600937..2673eed6b8 100644
--- a/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
+++ b/compiler/llvmGen/LlvmCodeGen/CodeGen.hs
@@ -224,9 +224,14 @@ genCall t@(PrimTarget (MO_Prefetch_Data localityInt)) [] args
return (stmts, top1 ++ top2)
| otherwise = panic $ "prefetch locality level integer must be between 0 and 3, given: " ++ (show localityInt)
--- Handle PopCnt and BSwap that need to only convert arg and return types
+-- Handle PopCnt, Clz, Ctz, and BSwap that need to only convert arg
+-- and return types
genCall t@(PrimTarget (MO_PopCnt w)) dsts args =
genCallSimpleCast w t dsts args
+genCall t@(PrimTarget (MO_Clz w)) dsts args =
+ genCallSimpleCast w t dsts args
+genCall t@(PrimTarget (MO_Ctz w)) dsts args =
+ genCallSimpleCast w t dsts args
genCall t@(PrimTarget (MO_BSwap w)) dsts args =
genCallSimpleCast w t dsts args
@@ -558,6 +563,8 @@ cmmPrimOpFunctions mop = do
(MO_PopCnt w) -> fsLit $ "llvm.ctpop." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_BSwap w) -> fsLit $ "llvm.bswap." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
+ (MO_Clz w) -> fsLit $ "llvm.ctlz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
+ (MO_Ctz w) -> fsLit $ "llvm.cttz." ++ showSDoc dflags (ppr $ widthToLlvmInt w)
(MO_Prefetch_Data _ )-> fsLit "llvm.prefetch"
diff --git a/compiler/nativeGen/CPrim.hs b/compiler/nativeGen/CPrim.hs
index 34782dfc1c..c52fe10b13 100644
--- a/compiler/nativeGen/CPrim.hs
+++ b/compiler/nativeGen/CPrim.hs
@@ -6,6 +6,8 @@ module CPrim
, cmpxchgLabel
, popCntLabel
, bSwapLabel
+ , clzLabel
+ , ctzLabel
, word2FloatLabel
) where
@@ -30,6 +32,24 @@ bSwapLabel w = "hs_bswap" ++ pprWidth w
pprWidth W64 = "64"
pprWidth w = pprPanic "bSwapLabel: Unsupported word width " (ppr w)
+clzLabel :: Width -> String
+clzLabel w = "hs_clz" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "clzLabel: Unsupported word width " (ppr w)
+
+ctzLabel :: Width -> String
+ctzLabel w = "hs_ctz" ++ pprWidth w
+ where
+ pprWidth W8 = "8"
+ pprWidth W16 = "16"
+ pprWidth W32 = "32"
+ pprWidth W64 = "64"
+ pprWidth w = pprPanic "ctzLabel: Unsupported word width " (ppr w)
+
word2FloatLabel :: Width -> String
word2FloatLabel w = "hs_word2float" ++ pprWidth w
where
diff --git a/compiler/nativeGen/PPC/CodeGen.hs b/compiler/nativeGen/PPC/CodeGen.hs
index 014117dd4c..3d3dff2e73 100644
--- a/compiler/nativeGen/PPC/CodeGen.hs
+++ b/compiler/nativeGen/PPC/CodeGen.hs
@@ -1151,6 +1151,8 @@ genCCall' dflags gcp target dest_regs args0
MO_BSwap w -> (fsLit $ bSwapLabel w, False)
MO_PopCnt w -> (fsLit $ popCntLabel w, False)
+ MO_Clz w -> (fsLit $ clzLabel w, False)
+ MO_Ctz w -> (fsLit $ ctzLabel w, False)
MO_AtomicRMW w amop -> (fsLit $ atomicRMWLabel w amop, False)
MO_Cmpxchg w -> (fsLit $ cmpxchgLabel w, False)
MO_AtomicRead w -> (fsLit $ atomicReadLabel w, False)
diff --git a/compiler/nativeGen/SPARC/CodeGen.hs b/compiler/nativeGen/SPARC/CodeGen.hs
index 51f89d629f..c192b8bda6 100644
--- a/compiler/nativeGen/SPARC/CodeGen.hs
+++ b/compiler/nativeGen/SPARC/CodeGen.hs
@@ -654,6 +654,8 @@ outOfLineMachOp_table mop
MO_BSwap w -> fsLit $ bSwapLabel w
MO_PopCnt w -> fsLit $ popCntLabel w
+ MO_Clz w -> fsLit $ clzLabel w
+ MO_Ctz w -> fsLit $ ctzLabel w
MO_AtomicRMW w amop -> fsLit $ atomicRMWLabel w amop
MO_Cmpxchg w -> fsLit $ cmpxchgLabel w
MO_AtomicRead w -> fsLit $ atomicReadLabel w
diff --git a/compiler/nativeGen/X86/CodeGen.hs b/compiler/nativeGen/X86/CodeGen.hs
index ce7120e24b..bc79e5e264 100644
--- a/compiler/nativeGen/X86/CodeGen.hs
+++ b/compiler/nativeGen/X86/CodeGen.hs
@@ -1767,6 +1767,69 @@ genCCall dflags is32Bit (PrimTarget (MO_PopCnt width)) dest_regs@[dst]
size = intSize width
lbl = mkCmmCodeLabel primPackageKey (fsLit (popCntLabel width))
+genCCall dflags is32Bit (PrimTarget (MO_Clz width)) dest_regs@[dst] args@[src]
+ | is32Bit && width == W64 = do
+ -- Fallback to `hs_clz64` on i386
+ targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
+ let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+ [NoHint] [NoHint]
+ CmmMayReturn)
+ genCCall dflags is32Bit target dest_regs args
+
+ | otherwise = do
+ code_src <- getAnyReg src
+ src_r <- getNewRegNat size
+ tmp_r <- getNewRegNat size
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+
+ -- The following insn sequence makes sure 'clz 0' has a defined value.
+ -- starting with Haswell, one could use the LZCNT insn instead.
+ return $ code_src src_r `appOL` toOL
+ ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
+ [ BSR size (OpReg src_r) tmp_r
+ , MOV II32 (OpImm (ImmInt (2*bw-1))) (OpReg dst_r)
+ , CMOV NE size (OpReg tmp_r) dst_r
+ , XOR size (OpImm (ImmInt (bw-1))) (OpReg dst_r)
+ ]) -- NB: We don't need to zero-extend the result for the
+ -- W8/W16 cases because the 'MOV' insn already
+ -- took care of implicitly clearing the upper bits
+ where
+ bw = widthInBits width
+ platform = targetPlatform dflags
+ size = if width == W8 then II16 else intSize width
+ lbl = mkCmmCodeLabel primPackageKey (fsLit (clzLabel width))
+
+genCCall dflags is32Bit (PrimTarget (MO_Ctz width)) dest_regs@[dst] args@[src]
+ | is32Bit, width == W64 = do
+ -- Fallback to `hs_ctz64` on i386
+ targetExpr <- cmmMakeDynamicReference dflags CallReference lbl
+ let target = ForeignTarget targetExpr (ForeignConvention CCallConv
+ [NoHint] [NoHint]
+ CmmMayReturn)
+ genCCall dflags is32Bit target dest_regs args
+
+ | otherwise = do
+ code_src <- getAnyReg src
+ src_r <- getNewRegNat size
+ tmp_r <- getNewRegNat size
+ let dst_r = getRegisterReg platform False (CmmLocal dst)
+
+ -- The following insn sequence makes sure 'ctz 0' has a defined value.
+ -- starting with Haswell, one could use the TZCNT insn instead.
+ return $ code_src src_r `appOL` toOL
+ ([ MOVZxL II8 (OpReg src_r) (OpReg src_r) | width == W8 ] ++
+ [ BSF size (OpReg src_r) tmp_r
+ , MOV II32 (OpImm (ImmInt bw)) (OpReg dst_r)
+ , CMOV NE size (OpReg tmp_r) dst_r
+ ]) -- NB: We don't need to zero-extend the result for the
+ -- W8/W16 cases because the 'MOV' insn already
+ -- took care of implicitly clearing the upper bits
+ where
+ bw = widthInBits width
+ platform = targetPlatform dflags
+ size = if width == W8 then II16 else intSize width
+ lbl = mkCmmCodeLabel primPackageKey (fsLit (ctzLabel width))
+
genCCall dflags is32Bit (PrimTarget (MO_UF_Conv width)) dest_regs args = do
targetExpr <- cmmMakeDynamicReference dflags
CallReference lbl
@@ -2403,6 +2466,8 @@ outOfLineCmmOp mop res args
MO_PopCnt _ -> fsLit "popcnt"
MO_BSwap _ -> fsLit "bswap"
+ MO_Clz w -> fsLit $ clzLabel w
+ MO_Ctz w -> fsLit $ ctzLabel w
MO_AtomicRMW _ _ -> fsLit "atomicrmw"
MO_AtomicRead _ -> fsLit "atomicread"
diff --git a/compiler/prelude/primops.txt.pp b/compiler/prelude/primops.txt.pp
index 0c33233e88..6844f42a5c 100644
--- a/compiler/prelude/primops.txt.pp
+++ b/compiler/prelude/primops.txt.pp
@@ -386,6 +386,28 @@ primop PopCnt64Op "popCnt64#" GenPrimOp WORD64 -> Word#
primop PopCntOp "popCnt#" Monadic Word# -> Word#
{Count the number of set bits in a word.}
+primop Clz8Op "clz8#" Monadic Word# -> Word#
+ {Count leading zeros in the lower 8 bits of a word.}
+primop Clz16Op "clz16#" Monadic Word# -> Word#
+ {Count leading zeros in the lower 16 bits of a word.}
+primop Clz32Op "clz32#" Monadic Word# -> Word#
+ {Count leading zeros in the lower 32 bits of a word.}
+primop Clz64Op "clz64#" GenPrimOp WORD64 -> Word#
+ {Count leading zeros in a 64-bit word.}
+primop ClzOp "clz#" Monadic Word# -> Word#
+ {Count leading zeros in a word.}
+
+primop Ctz8Op "ctz8#" Monadic Word# -> Word#
+ {Count trailing zeros in the lower 8 bits of a word.}
+primop Ctz16Op "ctz16#" Monadic Word# -> Word#
+ {Count trailing zeros in the lower 16 bits of a word.}
+primop Ctz32Op "ctz32#" Monadic Word# -> Word#
+ {Count trailing zeros in the lower 32 bits of a word.}
+primop Ctz64Op "ctz64#" GenPrimOp WORD64 -> Word#
+ {Count trailing zeros in a 64-bit word.}
+primop CtzOp "ctz#" Monadic Word# -> Word#
+ {Count trailing zeros in a word.}
+
primop BSwap16Op "byteSwap16#" Monadic Word# -> Word#
{Swap bytes in the lower 16 bits of a word. The higher bytes are undefined. }
primop BSwap32Op "byteSwap32#" Monadic Word# -> Word#
diff --git a/includes/stg/Prim.h b/includes/stg/Prim.h
index 9fdfd3ceaa..48bbddb09c 100644
--- a/includes/stg/Prim.h
+++ b/includes/stg/Prim.h
@@ -32,4 +32,16 @@ StgWord hs_popcnt(StgWord x);
StgFloat hs_word2float32(StgWord x);
StgDouble hs_word2float64(StgWord x);
+/* libraries/ghc-prim/cbits/clz.c */
+StgWord hs_clz8(StgWord x);
+StgWord hs_clz16(StgWord x);
+StgWord hs_clz32(StgWord x);
+StgWord hs_clz64(StgWord64 x);
+
+/* libraries/ghc-prim/cbits/ctz.c */
+StgWord hs_ctz8(StgWord x);
+StgWord hs_ctz16(StgWord x);
+StgWord hs_ctz32(StgWord x);
+StgWord hs_ctz64(StgWord64 x);
+
#endif /* PRIM_H */
diff --git a/libraries/ghc-prim/cbits/clz.c b/libraries/ghc-prim/cbits/clz.c
new file mode 100644
index 0000000000..b0637b5dfe
--- /dev/null
+++ b/libraries/ghc-prim/cbits/clz.c
@@ -0,0 +1,41 @@
+#include "MachDeps.h"
+#include "Rts.h"
+#include <stdint.h>
+
+// Fall-back implementations for count-leading-zeros primop
+//
+// __builtin_clz*() is supported by GCC and Clang
+
+#if SIZEOF_UNSIGNED_INT == 4
+StgWord
+hs_clz8(StgWord x)
+{
+ return (uint8_t)x ? __builtin_clz((uint8_t)x)-24 : 8;
+}
+
+StgWord
+hs_clz16(StgWord x)
+{
+ return (uint16_t)x ? __builtin_clz((uint16_t)x)-16 : 16;
+}
+
+StgWord
+hs_clz32(StgWord x)
+{
+ return (uint32_t)x ? __builtin_clz((uint32_t)x) : 32;
+}
+#else
+# error no suitable __builtin_clz() found
+#endif
+
+StgWord
+hs_clz64(StgWord64 x)
+{
+#if SIZEOF_UNSIGNED_LONG == 8
+ return x ? __builtin_clzl(x) : 64;
+#elif SIZEOF_UNSIGNED_LONG_LONG == 8
+ return x ? __builtin_clzll(x) : 64;
+#else
+# error no suitable __builtin_clz() found
+#endif
+}
diff --git a/libraries/ghc-prim/cbits/ctz.c b/libraries/ghc-prim/cbits/ctz.c
new file mode 100644
index 0000000000..cc420b9acd
--- /dev/null
+++ b/libraries/ghc-prim/cbits/ctz.c
@@ -0,0 +1,41 @@
+#include "MachDeps.h"
+#include "Rts.h"
+#include <stdint.h>
+
+// Fall-back implementations for count-trailing-zeros primop
+//
+// __builtin_ctz*() is supported by GCC and Clang
+
+#if SIZEOF_UNSIGNED_INT == 4
+StgWord
+hs_ctz8(StgWord x)
+{
+ return (uint8_t)x ? __builtin_ctz(x) : 8;
+}
+
+StgWord
+hs_ctz16(StgWord x)
+{
+ return (uint16_t)x ? __builtin_ctz(x) : 16;
+}
+
+StgWord
+hs_ctz32(StgWord x)
+{
+ return (uint32_t)x ? __builtin_ctz(x) : 32;
+}
+#else
+# error no suitable __builtin_ctz() found
+#endif
+
+StgWord
+hs_ctz64(StgWord64 x)
+{
+#if SIZEOF_UNSIGNED_LONG == 8
+ return x ? __builtin_ctzl(x) : 64;
+#elif SIZEOF_UNSIGNED_LONG_LONG == 8
+ return x ? __builtin_ctzll(x) : 64;
+#else
+# error no suitable __builtin_ctz() found
+#endif
+}
diff --git a/libraries/ghc-prim/ghc-prim.cabal b/libraries/ghc-prim/ghc-prim.cabal
index 9c1801b4d6..c87f3363c3 100644
--- a/libraries/ghc-prim/ghc-prim.cabal
+++ b/libraries/ghc-prim/ghc-prim.cabal
@@ -54,6 +54,8 @@ Library
c-sources:
cbits/atomic.c
cbits/bswap.c
+ cbits/clz.c
+ cbits/ctz.c
cbits/debug.c
cbits/longlong.c
cbits/popcnt.c
diff --git a/testsuite/.gitignore b/testsuite/.gitignore
index d4abe83909..5631eebef4 100644
--- a/testsuite/.gitignore
+++ b/testsuite/.gitignore
@@ -181,6 +181,7 @@ mk/ghcconfig_*_inplace_bin_ghc-stage2.exe.mk
/tests/codeGen/should_run/T8256
/tests/codeGen/should_run/T9001
/tests/codeGen/should_run/T9013
+/tests/codeGen/should_run/T9340
/tests/codeGen/should_run/Word2Float64
/tests/codeGen/should_run/cgrun001
/tests/codeGen/should_run/cgrun002
diff --git a/testsuite/tests/codeGen/should_run/T9340.hs b/testsuite/tests/codeGen/should_run/T9340.hs
new file mode 100644
index 0000000000..45f791ba73
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T9340.hs
@@ -0,0 +1,96 @@
+{-# LANGUAGE MagicHash #-}
+
+import Control.Monad
+import Data.Bits
+import GHC.Exts
+import GHC.Word
+import Numeric (showHex)
+
+-- Reference Implementation
+
+-- count trailing zeros
+ctzRI :: FiniteBits a => a -> Word
+ctzRI x = fromIntegral $ go 0
+ where
+ go i | i >= w = i
+ | testBit x i = i
+ | otherwise = go (i+1)
+
+ w = finiteBitSize x
+
+-- count leading zeros
+clzRI :: FiniteBits a => a -> Word
+clzRI x = fromIntegral $ (w-1) - go (w-1)
+ where
+ go i | i < 0 = i -- no bit set
+ | testBit x i = i
+ | otherwise = go (i-1)
+
+ w = finiteBitSize x
+
+clzRI32, ctzRI32 :: Word -> Word
+clzRI32 x = clzRI (fromIntegral x :: Word32)
+ctzRI32 x = ctzRI (fromIntegral x :: Word32)
+
+clzRI16, ctzRI16 :: Word -> Word
+clzRI16 x = clzRI (fromIntegral x :: Word16)
+ctzRI16 x = ctzRI (fromIntegral x :: Word16)
+
+clzRI8, ctzRI8 :: Word -> Word
+clzRI8 x = clzRI (fromIntegral x :: Word8)
+ctzRI8 x = ctzRI (fromIntegral x :: Word8)
+
+-- Implementation Under Test
+ctzIUT, clzIUT :: Word -> Word
+ctzIUT (W# x#) = W# (ctz# x#)
+clzIUT (W# x#) = W# (clz# x#)
+
+ctzIUT8, clzIUT8 :: Word -> Word
+ctzIUT8 (W# x#) = W# (ctz8# x#)
+clzIUT8 (W# x#) = W# (clz8# x#)
+
+ctzIUT16, clzIUT16 :: Word -> Word
+ctzIUT16 (W# x#) = W# (ctz16# x#)
+clzIUT16 (W# x#) = W# (clz16# x#)
+
+ctzIUT32, clzIUT32 :: Word -> Word
+ctzIUT32 (W# x#) = W# (ctz32# x#)
+clzIUT32 (W# x#) = W# (clz32# x#)
+
+ctzIUT64, clzIUT64 :: Word64 -> Word
+ctzIUT64 (W64# x#) = W# (ctz64# x#)
+clzIUT64 (W64# x#) = W# (clz64# x#)
+
+main :: IO ()
+main = do
+ forM_ testpats $ \w64 -> do
+ let w = fromIntegral w64 :: Word
+
+ check "clz" clzRI clzIUT w
+ check "clz8" clzRI8 clzIUT8 w
+ check "clz16" clzRI16 clzIUT16 w
+ check "clz32" clzRI32 clzIUT32 w
+ check "clz64" clzRI clzIUT64 w64
+
+ check "ctz" ctzRI ctzIUT w
+ check "ctz8" ctzRI8 ctzIUT8 w
+ check "ctz16" ctzRI16 ctzIUT16 w
+ check "ctz32" ctzRI32 ctzIUT32 w
+ check "ctz64" ctzRI ctzIUT64 w64
+
+ putStrLn $ concat ["tested ", show (length testpats), " patterns"]
+
+ where
+ -- try to construct some interesting patterns
+ testpats :: [Word64]
+ testpats = [ bit i - 1 | i <- [0..63] ] ++
+ [ complement (bit i - 1) | i <- [0..63] ] ++
+ [ bit i .|. bit j | i <- [0..63], j <- [0..i] ]
+
+ check s fri fiut v = unless (vri == viut) $ do
+ putStrLn $ concat [ "FAILED ", s, " for x=0x", showHex v ""
+ , " (RI=", show vri, " IUT=", show viut, ")"
+ ]
+ where
+ vri = fri v
+ viut = fiut v
diff --git a/testsuite/tests/codeGen/should_run/T9340.stdout b/testsuite/tests/codeGen/should_run/T9340.stdout
new file mode 100644
index 0000000000..455b0abc18
--- /dev/null
+++ b/testsuite/tests/codeGen/should_run/T9340.stdout
@@ -0,0 +1 @@
+tested 2208 patterns
diff --git a/testsuite/tests/codeGen/should_run/all.T b/testsuite/tests/codeGen/should_run/all.T
index 9ae7707c4a..03106d4791 100644
--- a/testsuite/tests/codeGen/should_run/all.T
+++ b/testsuite/tests/codeGen/should_run/all.T
@@ -122,3 +122,4 @@ test('SizeOfSmallArray', normal, compile_and_run, [''])
test('T9001', normal, compile_and_run, [''])
test('T9013', omit_ways(['ghci']), # ghci doesn't support unboxed tuples
compile_and_run, [''])
+test('T9340', normal, compile_and_run, [''])