Merge branch 'raimo/stdlib/rand-experiments/OTP-18011'

* raimo/stdlib/rand-experiments/OTP-18011: Add a measure/1 point for range 10000 Write a section about range capping Correct mwc59 test result tags Optimize mwc59 float generation for speed Increase mwc59 seed range to 58 bits Change the fast scrambler to return 32 bits Improve seeding Change mwc59 to returns 59 bits and use the high
author: Raimo Niskanen <raimo@erlang.org> 2022-05-12 08:56:30 +0200
committer: Raimo Niskanen <raimo@erlang.org> 2022-05-12 08:56:30 +0200
commit: 660596b107b62fbfd1ff028a9e46bb822484c2be (patch)
tree: 51ab637254a64eff66917f927151d4092200cae4
parent: 852daae195175eb56b983eefc7c8540d1e33665f (diff)
parent: 08f343bed4f75bf345b04b4c1fac7e1026a50ab3 (diff)
download: erlang-660596b107b62fbfd1ff028a9e46bb822484c2be.tar.gz
3 files changed, 419 insertions, 118 deletions
diff --git a/lib/stdlib/doc/src/rand.xml b/lib/stdlib/doc/src/rand.xml
index 25dec14a51..471a23f6b9 100644
--- a/lib/stdlib/doc/src/rand.xml
+++ b/lib/stdlib/doc/src/rand.xml
@@ -806,6 +806,175 @@ end.</pre>
     <fsdescription>
       <marker id="niche_algorithms"/>
       <title>Niche algorithms API</title>
+      <p>
+        This section contains special purpose algorithms
+        that does not use the
+        <seeerl marker="#plug_in_api">plug-in framework API</seeerl>,
+        for example for speed reasons.
+      </p>
+      <p>
+        Since these algorithms lack the plug-in framework support,
+        generating numbers in a range other than the
+        generator's own generated range may become a problem.
+      </p>
+      <p>
+        There are at least 3 ways to do this, assuming that
+        the range is less than the generator's range:
+      </p>
+      <taglist>
+        <tag>Modulo</tag>
+        <item>
+          <p>
+            To generate a number <c>V</c> in the range 0..<c>Range</c>-1:
+          </p>
+          <list type="bulleted">
+            <item>Generate a number <c>X</c>.</item>
+            <item>
+              Use <c>V&nbsp;=&nbsp;X&nbsp;rem&nbsp;Range</c> as your value.
+            </item>
+          </list>
+          <p>
+            This method uses <c>rem</c>, that is, the remainder of
+            an integer division, which is a slow operation.
+          </p>
+          <p>
+            Low bits from the generator propagate straight through
+            to the generated value, so if the generator has got
+            weaknesses in the low bits this method propagates
+            them too.
+          </p>
+          <p>
+            If <c>Range</c> is not a divisor of the generator range,
+            the generated numbers have a bias.
+            Example:
+          </p>
+          <p>
+            Say the generator generates a byte, that is,
+            the generator range is 0..255,
+            and the desired range is 0..99 (<c>Range=100</c>).
+            Then there are 3 generator outputs that produce the value 0,
+            that is; 0, 100 and 200.  But there are only
+            2 generator outputs that produce the value 99,
+            which are; 99 and 199.  So the probability for
+            a value <c>V</c> in 0..55 is 3/2 times
+            the probability for the other values 56..99.
+          </p>
+          <p>
+            If <c>Range</c> is much smaller than the generator range,
+            then this bias gets hard to detect.  The rule of thumb is
+            that if <c>Range</c> is smaller than the square root
+            of the generator range, the bias is small enough.
+            Example:
+          </p>
+          <p>
+            A byte generator when <c>Range=20</c>.
+            There are 12 (<c>256&nbsp;div&nbsp;20</c>)
+            possibilities to generate the highest numbers
+            and one more to generate a number
+            <c>V</c>&nbsp;&lt;&nbsp;16 (<c>256&nbsp;rem&nbsp;20</c>).
+            So the probability is 13/12 for a low number
+            versus a high.  To detect that difference
+            with some confidence you would need to generate
+            a lot more numbers than the generator range,
+            256 in this small example.
+          </p>
+        </item>
+        <tag>Truncated multiplication</tag>
+        <item>
+          <p>
+            To generate a number <c>V</c> in the range 0..<c>Range</c>-1,
+            when you have a generator with the range
+            0..2^<c>Bits</c>-1:
+          </p>
+          <list type="bulleted">
+            <item>Generate a number <c>X</c>.</item>
+            <item>
+              Use <c>V&nbsp;=&nbsp;X*Range&nbsp;bsr&nbsp;Bits</c>
+              as your value.
+            </item>
+          </list>
+          <p>
+            If the multiplication <c>X*Range</c> creates a bignum
+            this method becomes very slow.
+          </p>
+          <p>
+            High bits from the generator propagate through
+            to the generated value, so if the generator has got
+            weaknesses in the high bits this method propagates
+            them too.
+          </p>
+          <p>
+            If <c>Range</c> is not a divisor of the generator range,
+            the generated numbers have a bias,
+            pretty much as for the <em>Modulo</em> method above.
+          </p>
+        </item>
+        <tag>Shift or mask</tag>
+        <item>
+          <p>
+            To generate a number in the range 0..2^<c>RBits</c>-1,
+            when you have a generator with the range 0..2^<c>Bits</c>:
+          </p>
+          <list type="bulleted">
+            <item>Generate a number <c>X</c>.</item>
+            <item>
+              Use <c>V&nbsp;=&nbsp;X&nbsp;band&nbsp;((1&nbsp;bsl&nbsp;RBits)-1)</c>
+              or <c>V&nbsp;=&nbsp;X&nbsp;bsr&nbsp;(Bits-RBits)</c>
+              as your value.
+            </item>
+          </list>
+          <p>
+            Masking with <c>band</c> preserves the low bits,
+            and right shifting with <c>bsr</c> preserves the high,
+            so if the generator has got weaknesses in high or low
+            bits; choose the right operator.
+          </p>
+          <p>
+            If the generator has got a range that is not a power of 2
+            and this method is used anyway, it introduces bias
+            in the same way as for the <em>Modulo</em> method above.
+          </p>
+        </item>
+        <tag>Rejection</tag>
+        <item>
+          <list type="bulleted">
+            <item>Generate a number <c>X</c>.</item>
+            <item>
+              If <c>X</c> is in the range, use <c>V&nbsp;=&nbsp;X</c>
+              as your value, otherwise reject it and repeat.
+            </item>
+          </list>
+          <p>
+            In theory it is not certain that this method
+            will ever complete, but in practice you ensure
+            that the probability of rejection is low.
+            Then the probability for yet another iteration
+            decreases exponentially so the expected mean
+            number of iterations will often be between 1 and 2.
+            Also, since the base generator is a full length generator,
+            a value that will break the loop must eventually
+            be generated.
+          </p>
+        </item>
+      </taglist>
+      <p>
+        Chese methods can be combined, such as using the <em>Modulo</em>
+        method and only if the generator value would create bias
+        use <em>Rejection</em>.  Or using <em>Shift or mask</em>
+        to reduce the size of a generator value so that
+        <em>Truncated multiplication</em> will not create a bignum.
+      </p>
+      <p>
+        The recommended way to generate a floating point number
+        (IEEE 745 double, that has got a 53-bit mantissa)
+        in the range 0..1, that is
+        0.0&nbsp;=&lt;&nbsp;<c>V</c>&nbsp;&lt;1.0
+        is to generate a 53-bit number <c>X</c> and then use
+        <c>V&nbsp;=&nbsp;X&nbsp;*&nbsp;(1.0/((1&nbsp;bsl&nbsp;53)))</c>
+        as your value.  This will create a value on the form
+        <c>N</c>*2^-53 with equal probability for every
+        possible <c>N</c> for the range.
+      </p>
     </fsdescription>
     <func>
       <name name="splitmix64_next" arity="1" since="OTP 25.0"/>
@@ -861,6 +1030,11 @@ end.</pre>
             on a selected range, nor in generating a floating point number.
             It is easy to accidentally mess up the fairly good
             statistical properties of this generator when doing either.
+            See the recepies at the start of this
+            <seeerl marker="#niche_algorithms">
+              Niche algorithms API
+            </seeerl>
+            description.
             Note also the caveat about weak low bits that
             this generator suffers from.
             The generator is exported in this form
@@ -912,32 +1086,41 @@ end.</pre>
           Because the generator uses a multiplier that is
           a power of 2 it gets statistical flaws for collision tests
           and birthday spacings tests in 2 and 3 dimensions,
-          so the state should be scrambled, to create
-          an output value.
-        </p><p>
-          The quality of the state bits <c><anno>CX1</anno></c>
-          as a random value is far from good, but if speed is
-          much more important than these imperfections, the lowest
-          say 16 bits of the generator state could be used
-          without scrambling.
+          and even these caveats apply only to the MWC "digit",
+          that is the low 32 bits (due to the multiplier) of
+          the generator state.
         </p>
         <p>
+          The quality of the output value improves much by using
+          a scrambler instead of just taking the low bits.
           Function
-          <seemfa marker="#mwc59_fast_value/1">
-            <c>mwc59_fast_value</c>
+          <seemfa marker="#mwc59_value32/1">
+            <c>mwc59_value32</c>
           </seemfa>
-          is a fast scrambler that gives 32 decent bits, but still
-          some problems in 2- and 3-dimensional collision tests
-          show through.
+          is a fast scrambler that returns a decent 32-bit number.
           The slightly slower
           <seemfa marker="#mwc59_value/1">
             <c>mwc59_value</c>
           </seemfa>
-          scrambler returns 58 bits of very good quality, and
+          scrambler returns 59 bits of very good quality, and
           <seemfa marker="#mwc59_float/1"><c>mwc59_float</c></seemfa>
           returns a <c>float()</c> of very good quality.
         </p>
         <p>
+          The low bits of the base generator are surprisingly good,
+          so the lowest 16 bits actually pass fairly strict PRNG tests,
+          despite the generator's weaknesses that lie in the high
+          bits of the 32-bit MWC "digit".  It is recommended
+          to use <c>rem</c> on the the generator state,
+          or bit mask extracting the lowest bits to produce numbers
+          in a range 16 bits or less.
+          See the recepies at the start of this
+          <seeerl marker="#niche_algorithms">
+            Niche algorithms API
+          </seeerl>
+          description.
+        </p>
+        <p>
           On a typical 64 bit Erlang VM this generator executes
           in below 8% (1/13) of the time
           for the default algorithm in the
@@ -945,8 +1128,8 @@ end.</pre>
             plug-in framework API
           </seeerl>
           of this module.  With the
-          <seemfa marker="#mwc59_fast_value/1">
-            <c>mwc59_fast_value</c>
+          <seemfa marker="#mwc59_value32/1">
+            <c>mwc59_value32</c>
           </seemfa>
           scrambler the total time becomes 16% (1/6),
           and with
@@ -974,19 +1157,42 @@ end.</pre>
       </desc>
     </func>
     <func>
-      <name name="mwc59_fast_value" arity="1" since="OTP 25.0"/>
+      <name name="mwc59_value32" arity="1" since="OTP 25.0"/>
       <fsummary>Return the generator value.</fsummary>
       <desc>
         <p>
-          Returns a 58-bit value <c><anno>V</anno></c>
+          Returns a 32-bit value <c><anno>V</anno></c>
           from a generator state <c><anno>CX</anno></c>.
           The generator state is scrambled using
           an 8-bit xorshift which masks
           the statistical imperfecions of the base generator
           <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa>
-          enough that the 32 lowest bits are of decent quality.
-          It is not recommended to generate numbers
-          in a range > 2^32 with this function.
+          enough to produce numbers of decent quality.
+          Still some problems in 2- and 3-dimensional
+          birthday spacing and collision tests show through.
+        </p>
+        <p>
+          When using this scrambler it is in general better to use
+          the high bits of the value than the low.
+          The lowest 8 bits are of good quality and pass right through
+          from the base generator.  They are combined with the next 8
+          in the xorshift making the low 16 good quality,
+          but in the range 16..31 bits there are weaker bits
+          that you do not want to have as the high bits
+          of your generated values.
+          Therefore it is in general safer to shift out low bits.
+          See the recepies at the start of this
+          <seeerl marker="#niche_algorithms">
+            Niche algorithms API
+          </seeerl>
+          description.
+        </p>
+        <p>
+          For a non power of 2 range less than about 16 bits
+          (to not get too much bias and to avoid bignums)
+          truncated multiplication can be used,
+          which is much faster than using <c>rem</c>:
+          <c>(Range*<anno>V</anno>)&nbsp;bsr&nbsp;32</c>.
         </p>
       </desc>
     </func>
@@ -995,15 +1201,36 @@ end.</pre>
       <fsummary>Return the generator value.</fsummary>
       <desc>
         <p>
-          Returns a 58-bit value <c><anno>V</anno></c>
+          Returns a 59-bit value <c><anno>V</anno></c>
           from a generator state <c><anno>CX</anno></c>.
           The generator state is scrambled using
           an 4-bit followed by a 27-bit xorshift, which masks
           the statistical imperfecions of the base generator
           <seemfa marker="#mwc59/1"><c>mwc59</c></seemfa>
-          enough that all 58 bits are of very good quality.
-          Beware of bias in the generated numbers if
-          generating on a non power of 2 range too close to 2^58.
+          enough that all 59 bits are of very good quality.
+        </p>
+        <p>
+          Be careful to not accidentaly create a bignum
+          when handling the value <c><anno>V</anno></c>.
+        </p>
+        <p>
+          It is in general general better to use the high bits
+          from this scrambler than the low.
+          See the recepies at the start of this
+          <seeerl marker="#niche_algorithms">
+            Niche algorithms API
+          </seeerl>
+          description.
+        </p>
+        <p>
+          For a non power of 2 range less than about 29 bits
+          (to not get too much bias and to avoid bignums)
+          truncated multiplication can be used,
+          which is much faster than using <c>rem</c>.
+          Example for range 1'000'000'000;
+          the range is 30 bits, we use 29 bits from the generator,
+          adding up to 59 bits, which is not a bignum:
+          <c>(1000000000&nbsp;*&nbsp;(<anno>V</anno>&nbsp;bsr&nbsp;(59-29)))&nbsp;bsr&nbsp;29</c>.
         </p>
       </desc>
     </func>
@@ -1030,8 +1257,8 @@ end.</pre>
       <desc>
         <p>
           Returns a generator state <c><anno>CX</anno></c>.
-          It is set it to <c><anno>S</anno></c>, after folding it
-          into the state's range, if out of range.
+          <c><anno>S</anno></c> is hashed to create the generator state,
+          to avoid that similar seeds create similar sequences.
         </p>
         <p>
           Without <c><anno>S</anno></c>,
diff --git a/lib/stdlib/src/rand.erl b/lib/stdlib/src/rand.erl
index 6c85a79323..5f8d3a4c59 100644
--- a/lib/stdlib/src/rand.erl
+++ b/lib/stdlib/src/rand.erl
@@ -38,7 +38,7 @@
 
 %% Utilities
 -export([exsp_next/1, exsp_jump/1, splitmix64_next/1,
-         mwc59/1, mwc59_fast_value/1, mwc59_value/1, mwc59_float/1,
+         mwc59/1, mwc59_value32/1, mwc59_value/1, mwc59_float/1,
          mwc59_seed/0, mwc59_seed/1]).
 
 %% Test, dev and internal
@@ -53,6 +53,7 @@
 		   exs1024_next/1, exs1024_calc/2,
                    exro928_next_state/4,
                    exrop_next/1, exrop_next_s/2,
+                   mwc59_value/1,
 		   get_52/1, normal_kiwi/1]}).
 
 -define(DEFAULT_ALG_HANDLER, exsss).
@@ -1507,16 +1508,28 @@ mwc59(CX0) -> % when is_integer(CX0), 1 =< CX0, CX0 < ?MWC59_P ->
     X = ?MASK(?MWC59_B, CX),
     ?MWC59_A * X + C.
 
--spec mwc59_fast_value(CX :: mwc59_state()) -> V :: 0..?MASK(58).
-mwc59_fast_value(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P ->
-    CX = ?MASK(58, CX1),
-    CX bxor ?BSL(58, CX, ?MWC59_XS).
+%%% %% Verification by equivalent MCG generator
+%%% mwc59_r(CX1) ->
+%%%     (CX1 bsl ?MWC59_B) rem ?MWC59_P. % Reverse
+%%% %%%     (CX1 * ?MWC59_A) rem ?MWC59_P. % Forward
+%%%
+%%% mwc59(CX0, 0) ->
+%%%     CX0;
+%%% mwc59(CX0, N) ->
+%%%     CX1 = mwc59(CX0),
+%%%     CX0 = mwc59_r(CX1),
+%%%     mwc59(CX1, N - 1).
+
+-spec mwc59_value32(CX :: mwc59_state()) -> V :: 0..?MASK(32).
+mwc59_value32(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P ->
+    CX = ?MASK(32, CX1),
+    CX bxor ?BSL(32, CX, ?MWC59_XS).
 
--spec mwc59_value(CX :: mwc59_state()) -> V :: 0..?MASK(58).
+-spec mwc59_value(CX :: mwc59_state()) -> V :: 0..?MASK(59).
 mwc59_value(CX1) -> % when is_integer(CX1), 1 =< CX1, CX1 < ?MWC59_P ->
-    CX = ?MASK(58, CX1),
-    CX2 = CX bxor ?BSL(58, CX, ?MWC59_XS1),
-    CX2 bxor ?BSL(58, CX2, ?MWC59_XS2).
+    CX = ?MASK(59, CX1),
+    CX2 = CX bxor ?BSL(59, CX, ?MWC59_XS1),
+    CX2 bxor ?BSL(59, CX2, ?MWC59_XS2).
 
 -spec mwc59_float(CX :: mwc59_state()) -> V :: float().
 mwc59_float(CX1) ->
@@ -1528,25 +1541,25 @@ mwc59_float(CX1) ->
 -spec mwc59_seed() -> CX :: mwc59_state().
 mwc59_seed() ->
     {A1, A2, A3} = default_seed(),
-    mwc59_seed(seed64_int([A1, A2, A3])).
+    X1 = hash58(A1),
+    X2 = hash58(A2),
+    X3 = hash58(A3),
+    (X1 bxor X2 bxor X3) + 1.
 
--spec mwc59_seed(S :: integer()) -> CX :: mwc59_state().
-mwc59_seed(S) when is_integer(S) ->
-    mod(?MWC59_P - 1, S) + 1.
+-spec mwc59_seed(S :: 0..?MASK(58)) -> CX :: mwc59_state().
+mwc59_seed(S) when is_integer(S), 0 =< S, S =< ?MASK(58) ->
+    hash58(S) + 1.
 
-
-
-%%% %% Verification by equivalent MCG generator
-%%% mwc59_r(CX1) ->
-%%%     (CX1 bsl ?MWC59_B) rem ?MWC59_P. % Reverse
-%%% %%%     (CX1 * ?MWC59_A) rem ?MWC59_P. % Forward
-%%%
-%%% mwc59(CX0, 0) ->
-%%%     CX0;
-%%% mwc59(CX0, N) ->
-%%%     CX1 = mwc59(CX0),
-%%%     CX0 = mwc59_r(CX1),
-%%%     mwc59(CX1, N - 1).
+%% Constants a'la SplitMix64, MurMurHash, etc.
+%% Not that critical, just mix the bits using bijections
+%% (reversible mappings) to not have any two user input seeds
+%% become the same generator start state.
+%%
+hash58(X) ->
+    X0 = ?MASK(58, X),
+    X1 = ?MASK(58, (X0 bxor (X0 bsr 29)) * 16#351afd7ed558ccd),
+    X2 = ?MASK(58, (X1 bxor (X1 bsr 29)) * 16#0ceb9fe1a85ec53),
+    X2 bxor (X2 bsr 29).
 
 
 %% =====================================================================
@@ -1612,16 +1625,6 @@ seed64(X_0) ->
 	    ZX
     end.
 
-%% Create a splitmixed (big) integer from a list of integers
-seed64_int(As) ->
-    seed64_int(As, 0, 0).
-%%
-seed64_int([], _X, Y) ->
-    Y;
-seed64_int([A | As], X0, Y) ->
-    {Z, X1} = splitmix64_next(A bxor X0),
-    seed64_int(As, X1, (Y bsl 64) bor Z).
-
 %% =====================================================================
 %% The SplitMix64 generator:
 %%
@@ -1987,15 +1990,17 @@ bc(V, B, N) when B =< V -> N;
 bc(V, B, N) -> bc(V, B bsr 1, N - 1).
 
 
-%% Non-negative rem
-mod(Q, X) ->
-    Y = X rem Q,
-    if
-        Y < 0 ->
-            Y + Q;
-        true ->
-            Y
-    end.
+%%% %% Non-negative rem
+%%% mod(Q, X) when 0 =< X, X < Q ->
+%%%     X;
+%%% mod(Q, X) ->
+%%%     Y = X rem Q,
+%%%     if
+%%%         Y < 0 ->
+%%%             Y + Q;
+%%%         true ->
+%%%             Y
+%%%     end.
 
 
 make_float(S, E, M) ->
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index c03a687b3a..11722fd060 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -211,30 +211,44 @@ api_eq_1(S00) ->
 %% Verify mwc59 behaviour
 %%
 mwc59_api(Config) when is_list(Config) ->
-    mwc59_api(1, 1000000).
+    try rand:mwc59_seed(-1) of
+        CX1 ->
+            error({bad_return, CX1})
+    catch
+        error : function_clause ->
+            try rand:mwc59_seed(1 bsl 58) of
+                CX2 ->
+                    error({bad_return, CX2})
+            catch
+                error : function_clause ->
+                    Seed = 11213862807209314,
+                    Seed = rand:mwc59_seed(1),
+                    mwc59_api(Seed, 1000000)
+            end
+    end.
 
 mwc59_api(CX0, 0) ->
-    CX = 216355295181821136,
+    CX = 182322083224642863,
     {CX, CX} = {CX0, CX},
-    V0 = rand:mwc59_fast_value(CX0),
-    V = 262716604851324112,
+    V0 = rand:mwc59_value32(CX0),
+    V = 2905950767,
     {V, V} = {V0, V},
     W0 = rand:mwc59_value(CX0),
-    W = 240206843777063376,
+    W = 269866568368142303,
     {W, W} = {W0, W},
     F0 = rand:mwc59_float(CX0),
-    F = (W band ((1 bsl 53) - 1)) * (1 / (1 bsl 53)),
+    F = (W band ((1 bsl 53)-1)) * (1 / (1 bsl 53)),
     {F, F} = {F0, F},
     ok;
 mwc59_api(CX, N)
   when is_integer(CX), 1 =< CX, CX < (16#7fa6502 bsl 32) - 1 ->
-    V = rand:mwc59_fast_value(CX),
+    V = rand:mwc59_value32(CX),
     W = rand:mwc59_value(CX),
     F = rand:mwc59_float(CX),
     true = 0 =< V,
-    true = V < 1 bsl 58,
+    true = V < 1 bsl 32,
     true = 0 =< W,
-    true = W < 1 bsl 58,
+    true = W < 1 bsl 59,
     true = 0.0 =< F,
     true = F < 1.0,
     mwc59_api(rand:mwc59(CX), N - 1).
@@ -1129,28 +1143,84 @@ do_measure(Iterations) ->
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
                           %% Just a 'rem' with slightly skewed distribution
-                            case (rand:mwc59_fast_value(St1) rem Range) + 1 of
+                            case (rand:mwc59_value(St1) rem Range) + 1 of
                                 R when is_integer(R), 0 < R, R =< Range ->
                                     St1
                             end
                   end
           end,
-          {mwc59,fast_mod}, Iterations,
+          {mwc59,value_mod}, Iterations,
           TMarkUniformRange10000, OverheadUniformRange1000),
     _ =
         measure_1(
           fun (_Mod, _State) ->
                   Range = 10000,
                   fun (St0) ->
-                          St1 = rand:mwc59(St0),
+                          {V,St1} = rand:exsp_next(St0),
                           %% Just a 'rem' with slightly skewed distribution
-                            case (rand:mwc59_value(St1) rem Range) + 1 of
-                                R when is_integer(R), 0 < R, R =< Range ->
-                                    St1
-                            end
+                          case (V rem Range) + 1 of
+                              X when is_integer(X), 0 < X, X =< Range ->
+                                  St1
+                          end
                   end
           end,
-          {mwc59,value_mod}, Iterations,
+          {exsp,mod}, Iterations,
+          TMarkUniformRange10000, OverheadUniformRange1000),
+    _ =
+        measure_1(
+          fun (_Mod, _State) ->
+                  Range = 10000,
+                  fun (St0) ->
+                          St1 = rand:mwc59(St0),
+                          %% Truncated multiplication, slightly skewed
+                          case
+                              ((Range * (St1 band ((1 bsl 16)-1))) bsr 16)
+                              + 1
+                          of
+                              R when is_integer(R), 0 < R, R =< Range ->
+                                  St1
+                          end
+                  end
+          end,
+          {mwc59,raw_tm}, Iterations,
+          TMarkUniformRange10000, OverheadUniformRange1000),
+    _ =
+        measure_1(
+          fun (_Mod, _State) ->
+                  Range = 10000,
+                  fun (St0) ->
+                          St1 = rand:mwc59(St0),
+                          %% Truncated multiplication, slightly skewed
+                          case
+                              ((Range * rand:mwc59_value32(St1)) bsr 32)
+                              + 1
+                          of
+                              R when is_integer(R), 0 < R, R =< Range ->
+                                  St1
+                          end
+                  end
+          end,
+          {mwc59,value32_tm}, Iterations,
+          TMarkUniformRange10000, OverheadUniformRange1000),
+    _ =
+        measure_1(
+          fun (_Mod, _State) ->
+                  Range = 10000, % 14 bits
+                  fun (St0) ->
+                          St1 = rand:mwc59(St0),
+                          %% Truncated multiplication, slightly skewed
+                          case
+                              ( (Range *
+                                     (rand:mwc59_value(St1) bsr 14) )
+                                    bsr (59-14) )
+                                + 1
+                          of
+                              R when is_integer(R), 0 < R, R =< Range ->
+                                  St1
+                          end
+                  end
+          end,
+          {mwc59,value_tm}, Iterations,
           TMarkUniformRange10000, OverheadUniformRange1000),
     _ =
         measure_1(
@@ -1158,14 +1228,16 @@ do_measure(Iterations) ->
                   Range = 10000,
                   fun (St0) ->
                           {V,St1} = rand:exsp_next(St0),
-                          %% Just a 'rem' with slightly skewed distribution
-                          case (V rem Range) + 1 of
+                          %% Truncated multiplication, slightly skewed
+                          case
+                              ((Range * (V bsr 14)) bsr (58-14)) + 1
+                          of
                               X when is_integer(X), 0 < X, X =< Range ->
                                   St1
-                            end
+                          end
                   end
           end,
-          {exsp,mod}, Iterations,
+          {exsp,tm}, Iterations,
           TMarkUniformRange10000, OverheadUniformRange1000),
     _ =
         measure_1(
@@ -1231,13 +1303,13 @@ do_measure(Iterations) ->
                   Range = 1 bsl 32,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          case rand:mwc59_fast_value(St1) band ((1 bsl 32)-1) of
+                          case rand:mwc59_value32(St1) of
                               R when is_integer(R), 0 =< R, R < Range ->
                                   St1
                           end
                   end
           end,
-          {mwc59,fast_mask}, Iterations,
+          {mwc59,value32}, Iterations,
           TMarkUniform32Bit, OverheadUniform32Bit),
     _ =
         measure_1(
@@ -1245,16 +1317,13 @@ do_measure(Iterations) ->
                   Range = 1 bsl 32,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          case
-                              rand:mwc59_value(St1) band
-                              ((1 bsl 32)-1)
-                          of
+                          case rand:mwc59_value(St1) bsr (59-32) of
                               R when is_integer(R), 0 =< R, R < Range ->
                                   St1
                           end
                   end
           end,
-          {mwc59,value_mask}, Iterations,
+          {mwc59,value_shift}, Iterations,
           TMarkUniform32Bit, OverheadUniform32Bit),
     _ =
         measure_1(
@@ -1369,27 +1438,27 @@ do_measure(Iterations) ->
     _ =
         measure_1(
           fun (_Mod, _State) ->
-                  Range = 1 bsl 58,
+                  Range = (1 bsl 32) - 1,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          V = rand:mwc59_fast_value(St1),
+                          V = rand:mwc59_value32(St1),
                           if
-                              is_integer(V), 0 =< V, V < Range ->
+                              is_integer(V), 0 =< V, V =< Range ->
                                   St1
                           end
                   end
           end,
-          {mwc59,fast}, Iterations,
+          {mwc59,value32}, Iterations,
           TMarkUniformFullRange, OverheadUniformFullRange),
     _ =
         measure_1(
           fun (_Mod, _State) ->
-                  Range = 1 bsl 58,
+                  Range = (1 bsl 59) - 1,
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
                           V = rand:mwc59_value(St1),
                           if
-                              is_integer(V), 0 =< V, V < Range ->
+                              is_integer(V), 0 =< V, V =< Range ->
                                   St1
                           end
                   end
@@ -1422,7 +1491,7 @@ do_measure(Iterations) ->
                           end
                   end
           end,
-          splitmix64_inline, Iterations,
+          {splitmix64,next}, Iterations,
           TMarkUniformFullRange, OverheadUniformFullRange),
     _ =
         measure_1(
@@ -1528,7 +1597,7 @@ do_measure(Iterations) ->
                           end
                   end
           end,
-          splitmix64_inline, Iterations,
+          {splitmix64,next}, Iterations,
           TMarkUniform64Bit, OverheadUniform64Bit),
     %%
     ByteSize = 16, % At about 100 bytes crypto_bytes breaks even to exsss
@@ -1600,7 +1669,6 @@ do_measure(Iterations) ->
           fun (_Mod, _State) ->
                   fun (St0) ->
                           St1 = rand:mwc59(St0),
-                          %% Too few bits for full mantissa
                           case rand:mwc59_float(St1) of
                               R when is_float(R), 0.0 =< R, R < 1.0 ->
                                   St1
@@ -1749,8 +1817,6 @@ measure_init(Alg) ->
             {?MODULE, undefined};
         system_time ->
             {?MODULE, undefined};
-        splitmix64_inline ->
-            {rand, erlang:unique_integer()};
         procdict ->
             {rand, rand:seed(exsss)};
         {Name, Tag} ->
@@ -1762,7 +1828,9 @@ measure_init(Alg) ->
                     {rand, rand:mwc59_seed()};
                 exsp ->
                     {_, S} = rand:seed_s(exsp),
-                    {rand, S}
+                    {rand, S};
+                splitmix64 ->
+                    {rand, erlang:unique_integer()}
             end;
         _ ->
             {rand, rand:seed_s(Alg)}
@@ -1791,19 +1859,20 @@ mwc59_bytes(N, R0, Bin) when is_integer(N), 7*4 =< N ->
     R2 = rand:mwc59(R1),
     R3 = rand:mwc59(R2),
     R4 = rand:mwc59(R3),
-    V1 = rand:mwc59_fast_value(R1),
-    V2 = rand:mwc59_fast_value(R2),
-    V3 = rand:mwc59_fast_value(R3),
-    V4 = rand:mwc59_fast_value(R4),
+    Shift = 59 - 56,
+    V1 = rand:mwc59_value(R1) bsr Shift,
+    V2 = rand:mwc59_value(R2) bsr Shift,
+    V3 = rand:mwc59_value(R3) bsr Shift,
+    V4 = rand:mwc59_value(R4) bsr Shift,
     mwc59_bytes(N-7*4, R4, <<Bin/binary, V1:56, V2:56, V3:56, V4:56>>);
 mwc59_bytes(N, R0, Bin) when is_integer(N), 7 =< N ->
     R1 = rand:mwc59(R0),
-    V = rand:mwc59_fast_value(R1),
+    V = rand:mwc59_value(R1) bsr (59-56),
     mwc59_bytes(N-7, R1, <<Bin/binary, V:56>>);
 mwc59_bytes(N, R0, Bin) when is_integer(N), 0 < N ->
     R1 = rand:mwc59(R0),
     Bits = N bsl 3,
-    V = rand:mwc59_fast_value(R1),
+    V = rand:mwc59_value(R1) bsr (59-Bits),
     {<<Bin/binary, V:Bits>>, R1};
 mwc59_bytes(0, R0, Bin) ->
     {Bin, R0}.
author	Raimo Niskanen <raimo@erlang.org>	2022-05-12 08:56:30 +0200
committer	Raimo Niskanen <raimo@erlang.org>	2022-05-12 08:56:30 +0200
commit	660596b107b62fbfd1ff028a9e46bb822484c2be (patch)
tree	51ab637254a64eff66917f927151d4092200cae4
parent	852daae195175eb56b983eefc7c8540d1e33665f (diff)
parent	08f343bed4f75bf345b04b4c1fac7e1026a50ab3 (diff)
download	erlang-660596b107b62fbfd1ff028a9e46bb822484c2be.tar.gz