summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Trommler <ptrommler@acm.org>2018-10-27 22:46:17 +0200
committerPeter Trommler <ptrommler@acm.org>2018-12-20 11:30:34 +0100
commiteec2317e56333b52388037da9c49fe1af86663cc (patch)
tree77e159238378dd766338785c2e228b871c4f52b7
parentc9884bcb3ac691f25d55704c9ae01c6099229d3f (diff)
downloadhaskell-improve-pext-pdep.tar.gz
Efficient pdep implementationimprove-pext-pdep
-rw-r--r--libraries/ghc-prim/cbits/pdep.c107
1 files changed, 90 insertions, 17 deletions
diff --git a/libraries/ghc-prim/cbits/pdep.c b/libraries/ghc-prim/cbits/pdep.c
index 58e8611eca..2769008f67 100644
--- a/libraries/ghc-prim/cbits/pdep.c
+++ b/libraries/ghc-prim/cbits/pdep.c
@@ -4,40 +4,113 @@
StgWord64
hs_pdep64(StgWord64 src, StgWord64 mask)
{
- uint64_t result = 0;
+ uint64_t m0, mk, mp, mv, t;
+ uint64_t array[6];
- while (1) {
- // Mask out all but the lowest bit
- const uint64_t lowest = (-mask & mask);
+ m0 = mask;
+ mk = ~mask << 1;
- if (lowest == 0) {
- break;
- }
-
- const uint64_t lsb = (uint64_t)((int64_t)(src << 63) >> 63);
-
- result |= lsb & lowest;
- mask &= ~lowest;
- src >>= 1;
+ for (int i = 0; i < 6 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mp = mp ^ (mp << 16);
+ mp = mp ^ (mp << 32);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
}
- return result;
+ for (int i = 5; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
StgWord
hs_pdep32(StgWord src, StgWord mask)
{
- return hs_pdep64(src, mask);
+ uint32_t m0, mk, mp, mv, t;
+ uint32_t array[5];
+
+ m0 = mask;
+ mk = ~mask << 1;
+
+ for (int i = 0; i < 5 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mp = mp ^ (mp << 16);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
+ }
+
+ for (int i = 4; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
StgWord
hs_pdep16(StgWord src, StgWord mask)
{
- return hs_pdep64(src, mask);
+ uint16_t m0, mk, mp, mv, t;
+ uint16_t array[4];
+
+ m0 = mask;
+ mk = ~mask << 1;
+
+ for (int i = 0; i < 4 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mp = mp ^ (mp << 8);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
+ }
+
+ for (int i = 3; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}
StgWord
hs_pdep8(StgWord src, StgWord mask)
{
- return hs_pdep64(src, mask);
+ uint8_t m0, mk, mp, mv, t;
+ uint8_t array[3];
+
+ m0 = mask;
+ mk = ~mask << 1;
+
+ for (int i = 0; i < 3 ; i++) {
+ mp = mk ^ (mk << 1);
+ mp = mp ^ (mp << 2);
+ mp = mp ^ (mp << 4);
+ mv = mp & mask;
+ array[i] = mv;
+ mask = (mask ^ mv) | (mv >> (1 << i));
+ mk = mk & ~mp;
+ }
+
+ for (int i = 2; i >= 0; i--) {
+ mv = array[i];
+ t = src << (1 << i);
+ src = (src & ~ mv) | (t & mv);
+ }
+ return src & m0;
}