Merge pull request #20350 from WarrenWeckesser/zipf-example

DOC: random: Fix a mistake in the zipf example.
author: Melissa Weber Mendonça <melissawm@gmail.com> 2021-11-11 17:45:49 -0300
committer: GitHub <noreply@github.com> 2021-11-11 17:45:49 -0300
commit: 2513620bfbb30fa55bbbba1902f29284adab650c (patch)
tree: f83170bc165df10f6ba69b4727be7cc7fcbcdbe9
parent: e564fcbecfb0b589e224e6e66e8894105a46aedf (diff)
parent: abb136cad711a0c23657926f8e6f3f50d9c37572 (diff)
download: numpy-2513620bfbb30fa55bbbba1902f29284adab650c.tar.gz
2 files changed, 42 insertions, 25 deletions
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx
index 5347ea125..7087b6e1d 100644
--- a/numpy/random/_generator.pyx
+++ b/numpy/random/_generator.pyx
@@ -3107,7 +3107,7 @@ cdef class Generator:
         `a` > 1.
 
         The Zipf distribution (also known as the zeta distribution) is a
-        continuous probability distribution that satisfies Zipf's law: the
+        discrete probability distribution that satisfies Zipf's law: the
         frequency of an item is inversely proportional to its rank in a
         frequency table.
 
@@ -3135,9 +3135,10 @@ cdef class Generator:
         -----
         The probability density for the Zipf distribution is
 
-        .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)},
+        .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)},
 
-        where :math:`\\zeta` is the Riemann Zeta function.
+        for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta
+        function.
 
         It is named for the American linguist George Kingsley Zipf, who noted
         that the frequency of any word in a sample of a language is inversely
@@ -3153,22 +3154,29 @@ cdef class Generator:
         --------
         Draw samples from the distribution:
 
-        >>> a = 2. # parameter
-        >>> s = np.random.default_rng().zipf(a, 1000)
+        >>> a = 4.0
+        >>> n = 20000
+        >>> s = np.random.default_rng().zipf(a, size=n)
 
         Display the histogram of the samples, along with
-        the probability density function:
+        the expected histogram based on the probability
+        density function:
 
         >>> import matplotlib.pyplot as plt
-        >>> from scipy import special  # doctest: +SKIP
+        >>> from scipy.special import zeta  # doctest: +SKIP
+
+        `bincount` provides a fast histogram for small integers.
 
-        Truncate s values at 50 so plot is interesting:
+        >>> count = np.bincount(s)
+        >>> k = np.arange(1, s.max() + 1)
 
-        >>> count, bins, ignored = plt.hist(s[s<50],
-        ...         50, density=True)
-        >>> x = np.arange(1., 50.)
-        >>> y = x**(-a) / special.zetac(a)  # doctest: +SKIP
-        >>> plt.plot(x, y/max(y), linewidth=2, color='r')  # doctest: +SKIP
+        >>> plt.bar(k, count[1:], alpha=0.5, label='sample count')
+        >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5,
+        ...          label='expected count')   # doctest: +SKIP
+        >>> plt.semilogy()
+        >>> plt.grid(alpha=0.4)
+        >>> plt.legend()
+        >>> plt.title(f'Zipf sample, a={a}, size={n}')
         >>> plt.show()
 
         """
diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
index 81a526ab4..3e13503d0 100644
--- a/numpy/random/mtrand.pyx
+++ b/numpy/random/mtrand.pyx
@@ -3609,7 +3609,7 @@ cdef class RandomState:
         `a` > 1.
 
         The Zipf distribution (also known as the zeta distribution) is a
-        continuous probability distribution that satisfies Zipf's law: the
+        discrete probability distribution that satisfies Zipf's law: the
         frequency of an item is inversely proportional to its rank in a
         frequency table.
 
@@ -3642,9 +3642,10 @@ cdef class RandomState:
         -----
         The probability density for the Zipf distribution is
 
-        .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)},
+        .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)},
 
-        where :math:`\\zeta` is the Riemann Zeta function.
+        for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta
+        function.
 
         It is named for the American linguist George Kingsley Zipf, who noted
         that the frequency of any word in a sample of a language is inversely
@@ -3660,21 +3661,29 @@ cdef class RandomState:
         --------
         Draw samples from the distribution:
 
-        >>> a = 2. # parameter
-        >>> s = np.random.zipf(a, 1000)
+        >>> a = 4.0
+        >>> n = 20000
+        >>> s = np.random.zipf(a, n)
 
         Display the histogram of the samples, along with
-        the probability density function:
+        the expected histogram based on the probability
+        density function:
 
         >>> import matplotlib.pyplot as plt
-        >>> from scipy import special  # doctest: +SKIP
+        >>> from scipy.special import zeta  # doctest: +SKIP
+
+        `bincount` provides a fast histogram for small integers.
 
-        Truncate s values at 50 so plot is interesting:
+        >>> count = np.bincount(s)
+        >>> k = np.arange(1, s.max() + 1)
 
-        >>> count, bins, ignored = plt.hist(s[s<50], 50, density=True)
-        >>> x = np.arange(1., 50.)
-        >>> y = x**(-a) / special.zetac(a)  # doctest: +SKIP
-        >>> plt.plot(x, y/max(y), linewidth=2, color='r')  # doctest: +SKIP
+        >>> plt.bar(k, count[1:], alpha=0.5, label='sample count')
+        >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5,
+        ...          label='expected count')   # doctest: +SKIP
+        >>> plt.semilogy()
+        >>> plt.grid(alpha=0.4)
+        >>> plt.legend()
+        >>> plt.title(f'Zipf sample, a={a}, size={n}')
         >>> plt.show()
 
         """
author	Melissa Weber Mendonça <melissawm@gmail.com>	2021-11-11 17:45:49 -0300
committer	GitHub <noreply@github.com>	2021-11-11 17:45:49 -0300
commit	2513620bfbb30fa55bbbba1902f29284adab650c (patch)
tree	f83170bc165df10f6ba69b4727be7cc7fcbcdbe9
parent	e564fcbecfb0b589e224e6e66e8894105a46aedf (diff)
parent	abb136cad711a0c23657926f8e6f3f50d9c37572 (diff)
download	numpy-2513620bfbb30fa55bbbba1902f29284adab650c.tar.gz