summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMelissa Weber Mendonça <melissawm@gmail.com>2021-11-11 17:45:49 -0300
committerGitHub <noreply@github.com>2021-11-11 17:45:49 -0300
commit2513620bfbb30fa55bbbba1902f29284adab650c (patch)
treef83170bc165df10f6ba69b4727be7cc7fcbcdbe9
parente564fcbecfb0b589e224e6e66e8894105a46aedf (diff)
parentabb136cad711a0c23657926f8e6f3f50d9c37572 (diff)
downloadnumpy-2513620bfbb30fa55bbbba1902f29284adab650c.tar.gz
Merge pull request #20350 from WarrenWeckesser/zipf-example
DOC: random: Fix a mistake in the zipf example.
-rw-r--r--numpy/random/_generator.pyx34
-rw-r--r--numpy/random/mtrand.pyx33
2 files changed, 42 insertions, 25 deletions
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx
index 5347ea125..7087b6e1d 100644
--- a/numpy/random/_generator.pyx
+++ b/numpy/random/_generator.pyx
@@ -3107,7 +3107,7 @@ cdef class Generator:
`a` > 1.
The Zipf distribution (also known as the zeta distribution) is a
- continuous probability distribution that satisfies Zipf's law: the
+ discrete probability distribution that satisfies Zipf's law: the
frequency of an item is inversely proportional to its rank in a
frequency table.
@@ -3135,9 +3135,10 @@ cdef class Generator:
-----
The probability density for the Zipf distribution is
- .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)},
+ .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)},
- where :math:`\\zeta` is the Riemann Zeta function.
+ for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta
+ function.
It is named for the American linguist George Kingsley Zipf, who noted
that the frequency of any word in a sample of a language is inversely
@@ -3153,22 +3154,29 @@ cdef class Generator:
--------
Draw samples from the distribution:
- >>> a = 2. # parameter
- >>> s = np.random.default_rng().zipf(a, 1000)
+ >>> a = 4.0
+ >>> n = 20000
+ >>> s = np.random.default_rng().zipf(a, size=n)
Display the histogram of the samples, along with
- the probability density function:
+ the expected histogram based on the probability
+ density function:
>>> import matplotlib.pyplot as plt
- >>> from scipy import special # doctest: +SKIP
+ >>> from scipy.special import zeta # doctest: +SKIP
+
+ `bincount` provides a fast histogram for small integers.
- Truncate s values at 50 so plot is interesting:
+ >>> count = np.bincount(s)
+ >>> k = np.arange(1, s.max() + 1)
- >>> count, bins, ignored = plt.hist(s[s<50],
- ... 50, density=True)
- >>> x = np.arange(1., 50.)
- >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP
- >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP
+ >>> plt.bar(k, count[1:], alpha=0.5, label='sample count')
+ >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5,
+ ... label='expected count') # doctest: +SKIP
+ >>> plt.semilogy()
+ >>> plt.grid(alpha=0.4)
+ >>> plt.legend()
+ >>> plt.title(f'Zipf sample, a={a}, size={n}')
>>> plt.show()
"""
diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx
index 81a526ab4..3e13503d0 100644
--- a/numpy/random/mtrand.pyx
+++ b/numpy/random/mtrand.pyx
@@ -3609,7 +3609,7 @@ cdef class RandomState:
`a` > 1.
The Zipf distribution (also known as the zeta distribution) is a
- continuous probability distribution that satisfies Zipf's law: the
+ discrete probability distribution that satisfies Zipf's law: the
frequency of an item is inversely proportional to its rank in a
frequency table.
@@ -3642,9 +3642,10 @@ cdef class RandomState:
-----
The probability density for the Zipf distribution is
- .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)},
+ .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)},
- where :math:`\\zeta` is the Riemann Zeta function.
+ for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta
+ function.
It is named for the American linguist George Kingsley Zipf, who noted
that the frequency of any word in a sample of a language is inversely
@@ -3660,21 +3661,29 @@ cdef class RandomState:
--------
Draw samples from the distribution:
- >>> a = 2. # parameter
- >>> s = np.random.zipf(a, 1000)
+ >>> a = 4.0
+ >>> n = 20000
+ >>> s = np.random.zipf(a, n)
Display the histogram of the samples, along with
- the probability density function:
+ the expected histogram based on the probability
+ density function:
>>> import matplotlib.pyplot as plt
- >>> from scipy import special # doctest: +SKIP
+ >>> from scipy.special import zeta # doctest: +SKIP
+
+ `bincount` provides a fast histogram for small integers.
- Truncate s values at 50 so plot is interesting:
+ >>> count = np.bincount(s)
+ >>> k = np.arange(1, s.max() + 1)
- >>> count, bins, ignored = plt.hist(s[s<50], 50, density=True)
- >>> x = np.arange(1., 50.)
- >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP
- >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP
+ >>> plt.bar(k, count[1:], alpha=0.5, label='sample count')
+ >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5,
+ ... label='expected count') # doctest: +SKIP
+ >>> plt.semilogy()
+ >>> plt.grid(alpha=0.4)
+ >>> plt.legend()
+ >>> plt.title(f'Zipf sample, a={a}, size={n}')
>>> plt.show()
"""