diff options
author | Herbert Valerio Riedel <hvr@gnu.org> | 2014-08-11 18:56:57 +0200 |
---|---|---|
committer | Herbert Valerio Riedel <hvr@gnu.org> | 2014-08-14 11:34:23 +0200 |
commit | e0c1767d0ea8d12e0a4badf43682a08784e379c6 (patch) | |
tree | 6662fe33cd7e803253458f91307b1b5826e30b0f /libraries | |
parent | 6b5ea617dcd162e682886d5843df51a2866218d3 (diff) | |
download | haskell-e0c1767d0ea8d12e0a4badf43682a08784e379c6.tar.gz |
Implement new CLZ and CTZ primops (re #9340)
This implements the new primops
clz#, clz32#, clz64#,
ctz#, ctz32#, ctz64#
which provide efficient implementations of the popular
count-leading-zero and count-trailing-zero respectively
(see testcase for a pure Haskell reference implementation).
On x86, NCG as well as LLVM generates code based on the BSF/BSR
instructions (which need extra logic to make the 0-case well-defined).
Test Plan: validate and succesful tests on i686 and amd64
Reviewers: rwbarton, simonmar, ezyang, austin
Subscribers: simonmar, relrod, ezyang, carter
Differential Revision: https://phabricator.haskell.org/D144
GHC Trac Issues: #9340
Diffstat (limited to 'libraries')
-rw-r--r-- | libraries/ghc-prim/cbits/clz.c | 41 | ||||
-rw-r--r-- | libraries/ghc-prim/cbits/ctz.c | 41 | ||||
-rw-r--r-- | libraries/ghc-prim/ghc-prim.cabal | 2 |
3 files changed, 84 insertions, 0 deletions
diff --git a/libraries/ghc-prim/cbits/clz.c b/libraries/ghc-prim/cbits/clz.c new file mode 100644 index 0000000000..b0637b5dfe --- /dev/null +++ b/libraries/ghc-prim/cbits/clz.c @@ -0,0 +1,41 @@ +#include "MachDeps.h" +#include "Rts.h" +#include <stdint.h> + +// Fall-back implementations for count-leading-zeros primop +// +// __builtin_clz*() is supported by GCC and Clang + +#if SIZEOF_UNSIGNED_INT == 4 +StgWord +hs_clz8(StgWord x) +{ + return (uint8_t)x ? __builtin_clz((uint8_t)x)-24 : 8; +} + +StgWord +hs_clz16(StgWord x) +{ + return (uint16_t)x ? __builtin_clz((uint16_t)x)-16 : 16; +} + +StgWord +hs_clz32(StgWord x) +{ + return (uint32_t)x ? __builtin_clz((uint32_t)x) : 32; +} +#else +# error no suitable __builtin_clz() found +#endif + +StgWord +hs_clz64(StgWord64 x) +{ +#if SIZEOF_UNSIGNED_LONG == 8 + return x ? __builtin_clzl(x) : 64; +#elif SIZEOF_UNSIGNED_LONG_LONG == 8 + return x ? __builtin_clzll(x) : 64; +#else +# error no suitable __builtin_clz() found +#endif +} diff --git a/libraries/ghc-prim/cbits/ctz.c b/libraries/ghc-prim/cbits/ctz.c new file mode 100644 index 0000000000..cc420b9acd --- /dev/null +++ b/libraries/ghc-prim/cbits/ctz.c @@ -0,0 +1,41 @@ +#include "MachDeps.h" +#include "Rts.h" +#include <stdint.h> + +// Fall-back implementations for count-trailing-zeros primop +// +// __builtin_ctz*() is supported by GCC and Clang + +#if SIZEOF_UNSIGNED_INT == 4 +StgWord +hs_ctz8(StgWord x) +{ + return (uint8_t)x ? __builtin_ctz(x) : 8; +} + +StgWord +hs_ctz16(StgWord x) +{ + return (uint16_t)x ? __builtin_ctz(x) : 16; +} + +StgWord +hs_ctz32(StgWord x) +{ + return (uint32_t)x ? __builtin_ctz(x) : 32; +} +#else +# error no suitable __builtin_ctz() found +#endif + +StgWord +hs_ctz64(StgWord64 x) +{ +#if SIZEOF_UNSIGNED_LONG == 8 + return x ? __builtin_ctzl(x) : 64; +#elif SIZEOF_UNSIGNED_LONG_LONG == 8 + return x ? __builtin_ctzll(x) : 64; +#else +# error no suitable __builtin_ctz() found +#endif +} diff --git a/libraries/ghc-prim/ghc-prim.cabal b/libraries/ghc-prim/ghc-prim.cabal index 9c1801b4d6..c87f3363c3 100644 --- a/libraries/ghc-prim/ghc-prim.cabal +++ b/libraries/ghc-prim/ghc-prim.cabal @@ -54,6 +54,8 @@ Library c-sources: cbits/atomic.c cbits/bswap.c + cbits/clz.c + cbits/ctz.c cbits/debug.c cbits/longlong.c cbits/popcnt.c |