diff options
-rw-r--r-- | libraries/ghc-prim/cbits/pdep.c | 107 |
1 files changed, 90 insertions, 17 deletions
diff --git a/libraries/ghc-prim/cbits/pdep.c b/libraries/ghc-prim/cbits/pdep.c index 58e8611eca..2769008f67 100644 --- a/libraries/ghc-prim/cbits/pdep.c +++ b/libraries/ghc-prim/cbits/pdep.c @@ -4,40 +4,113 @@ StgWord64 hs_pdep64(StgWord64 src, StgWord64 mask) { - uint64_t result = 0; + uint64_t m0, mk, mp, mv, t; + uint64_t array[6]; - while (1) { - // Mask out all but the lowest bit - const uint64_t lowest = (-mask & mask); + m0 = mask; + mk = ~mask << 1; - if (lowest == 0) { - break; - } - - const uint64_t lsb = (uint64_t)((int64_t)(src << 63) >> 63); - - result |= lsb & lowest; - mask &= ~lowest; - src >>= 1; + for (int i = 0; i < 6 ; i++) { + mp = mk ^ (mk << 1); + mp = mp ^ (mp << 2); + mp = mp ^ (mp << 4); + mp = mp ^ (mp << 8); + mp = mp ^ (mp << 16); + mp = mp ^ (mp << 32); + mv = mp & mask; + array[i] = mv; + mask = (mask ^ mv) | (mv >> (1 << i)); + mk = mk & ~mp; } - return result; + for (int i = 5; i >= 0; i--) { + mv = array[i]; + t = src << (1 << i); + src = (src & ~ mv) | (t & mv); + } + return src & m0; } StgWord hs_pdep32(StgWord src, StgWord mask) { - return hs_pdep64(src, mask); + uint32_t m0, mk, mp, mv, t; + uint32_t array[5]; + + m0 = mask; + mk = ~mask << 1; + + for (int i = 0; i < 5 ; i++) { + mp = mk ^ (mk << 1); + mp = mp ^ (mp << 2); + mp = mp ^ (mp << 4); + mp = mp ^ (mp << 8); + mp = mp ^ (mp << 16); + mv = mp & mask; + array[i] = mv; + mask = (mask ^ mv) | (mv >> (1 << i)); + mk = mk & ~mp; + } + + for (int i = 4; i >= 0; i--) { + mv = array[i]; + t = src << (1 << i); + src = (src & ~ mv) | (t & mv); + } + return src & m0; } StgWord hs_pdep16(StgWord src, StgWord mask) { - return hs_pdep64(src, mask); + uint16_t m0, mk, mp, mv, t; + uint16_t array[4]; + + m0 = mask; + mk = ~mask << 1; + + for (int i = 0; i < 4 ; i++) { + mp = mk ^ (mk << 1); + mp = mp ^ (mp << 2); + mp = mp ^ (mp << 4); + mp = mp ^ (mp << 8); + mv = mp & mask; + array[i] = mv; + mask = (mask ^ mv) | (mv >> (1 << i)); + mk = mk & ~mp; + } + + for (int i = 3; i >= 0; i--) { + mv = array[i]; + t = src << (1 << i); + src = (src & ~ mv) | (t & mv); + } + return src & m0; } StgWord hs_pdep8(StgWord src, StgWord mask) { - return hs_pdep64(src, mask); + uint8_t m0, mk, mp, mv, t; + uint8_t array[3]; + + m0 = mask; + mk = ~mask << 1; + + for (int i = 0; i < 3 ; i++) { + mp = mk ^ (mk << 1); + mp = mp ^ (mp << 2); + mp = mp ^ (mp << 4); + mv = mp & mask; + array[i] = mv; + mask = (mask ^ mv) | (mv >> (1 << i)); + mk = mk & ~mp; + } + + for (int i = 2; i >= 0; i--) { + mv = array[i]; + t = src << (1 << i); + src = (src & ~ mv) | (t & mv); + } + return src & m0; } |