diff options
author | Matti Picus <matti.picus@gmail.com> | 2019-05-31 16:25:27 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-31 16:25:27 +0300 |
commit | 392866d1c8dce0abb8cb327a42f8e134f5d2a05e (patch) | |
tree | f7db2105acf825b9217f16c5349cbb7ff8d4610a /numpy/random/src | |
parent | 0c70787c04d7b0febacb14edfe214cb68d87b6c3 (diff) | |
parent | 9695c1ff191b5d1d9fe6480a040baf320ce20d02 (diff) | |
download | numpy-392866d1c8dce0abb8cb327a42f8e134f5d2a05e.tar.gz |
Merge pull request #13665 from bashtage/rkern-pcg-patch
MAINT: Use intrinsics in Win64-PCG64
Diffstat (limited to 'numpy/random/src')
-rw-r--r-- | numpy/random/src/pcg64/pcg64.h | 32 |
1 files changed, 25 insertions, 7 deletions
diff --git a/numpy/random/src/pcg64/pcg64.h b/numpy/random/src/pcg64/pcg64.h index 0c263bb7b..20d64f8ab 100644 --- a/numpy/random/src/pcg64/pcg64.h +++ b/numpy/random/src/pcg64/pcg64.h @@ -72,6 +72,9 @@ typedef struct { uint64_t low; } pcg128_t; +#define PCG_DEFAULT_MULTIPLIER_HIGH 2549297995355413924ULL +#define PCG_DEFAULT_MULTIPLIER_LOW 4865540595714422341ULL + static inline pcg128_t PCG_128BIT_CONSTANT(uint64_t high, uint64_t low) { pcg128_t result; result.high = high; @@ -90,7 +93,7 @@ typedef struct { } pcg_state_setseq_128; #define PCG_DEFAULT_MULTIPLIER_128 \ - PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL) + PCG_128BIT_CONSTANT(PCG_DEFAULT_MULTIPLIER_HIGH, PCG_DEFAULT_MULTIPLIER_LOW) #define PCG_DEFAULT_INCREMENT_128 \ PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL) #define PCG_STATE_SETSEQ_128_INITIALIZER \ @@ -172,6 +175,27 @@ static inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, pcg_setseq_128_step_r(rng); } +static inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng) { +#if defined _WIN32 && _MSC_VER >= 1900 && _M_AMD64 + uint64_t h1; + pcg128_t product; + + /* Manually inline the multiplication and addition using intrinsics */ + h1 = rng->state.high * PCG_DEFAULT_MULTIPLIER_LOW + + rng->state.low * PCG_DEFAULT_MULTIPLIER_HIGH; + product.low = + _umul128(rng->state.low, PCG_DEFAULT_MULTIPLIER_LOW, &(product.high)); + product.high += h1; + _addcarry_u64(_addcarry_u64(0, product.low, rng->inc.low, &(rng->state.low)), + product.high, rng->inc.high, &(rng->state.high)); + return _rotr64(rng->state.high ^ rng->state.low, rng->state.high >> 58u); +#else + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +#endif +} + #else /* PCG_EMULATED_128BIT_MATH */ static inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng) { @@ -196,12 +220,6 @@ static inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, #endif /* PCG_EMULATED_128BIT_MATH */ static inline uint64_t -pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng) { - pcg_setseq_128_step_r(rng); - return pcg_output_xsl_rr_128_64(rng->state); -} - -static inline uint64_t pcg_setseq_128_xsl_rr_64_boundedrand_r(pcg_state_setseq_128 *rng, uint64_t bound) { uint64_t threshold = -bound % bound; |