diff options
author | Melissa Weber Mendonça <melissawm@gmail.com> | 2021-11-11 17:45:49 -0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-11-11 17:45:49 -0300 |
commit | 2513620bfbb30fa55bbbba1902f29284adab650c (patch) | |
tree | f83170bc165df10f6ba69b4727be7cc7fcbcdbe9 | |
parent | e564fcbecfb0b589e224e6e66e8894105a46aedf (diff) | |
parent | abb136cad711a0c23657926f8e6f3f50d9c37572 (diff) | |
download | numpy-2513620bfbb30fa55bbbba1902f29284adab650c.tar.gz |
Merge pull request #20350 from WarrenWeckesser/zipf-example
DOC: random: Fix a mistake in the zipf example.
-rw-r--r-- | numpy/random/_generator.pyx | 34 | ||||
-rw-r--r-- | numpy/random/mtrand.pyx | 33 |
2 files changed, 42 insertions, 25 deletions
diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index 5347ea125..7087b6e1d 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -3107,7 +3107,7 @@ cdef class Generator: `a` > 1. The Zipf distribution (also known as the zeta distribution) is a - continuous probability distribution that satisfies Zipf's law: the + discrete probability distribution that satisfies Zipf's law: the frequency of an item is inversely proportional to its rank in a frequency table. @@ -3135,9 +3135,10 @@ cdef class Generator: ----- The probability density for the Zipf distribution is - .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)}, + .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)}, - where :math:`\\zeta` is the Riemann Zeta function. + for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta + function. It is named for the American linguist George Kingsley Zipf, who noted that the frequency of any word in a sample of a language is inversely @@ -3153,22 +3154,29 @@ cdef class Generator: -------- Draw samples from the distribution: - >>> a = 2. # parameter - >>> s = np.random.default_rng().zipf(a, 1000) + >>> a = 4.0 + >>> n = 20000 + >>> s = np.random.default_rng().zipf(a, size=n) Display the histogram of the samples, along with - the probability density function: + the expected histogram based on the probability + density function: >>> import matplotlib.pyplot as plt - >>> from scipy import special # doctest: +SKIP + >>> from scipy.special import zeta # doctest: +SKIP + + `bincount` provides a fast histogram for small integers. - Truncate s values at 50 so plot is interesting: + >>> count = np.bincount(s) + >>> k = np.arange(1, s.max() + 1) - >>> count, bins, ignored = plt.hist(s[s<50], - ... 50, density=True) - >>> x = np.arange(1., 50.) - >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP - >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP + >>> plt.bar(k, count[1:], alpha=0.5, label='sample count') + >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5, + ... label='expected count') # doctest: +SKIP + >>> plt.semilogy() + >>> plt.grid(alpha=0.4) + >>> plt.legend() + >>> plt.title(f'Zipf sample, a={a}, size={n}') >>> plt.show() """ diff --git a/numpy/random/mtrand.pyx b/numpy/random/mtrand.pyx index 81a526ab4..3e13503d0 100644 --- a/numpy/random/mtrand.pyx +++ b/numpy/random/mtrand.pyx @@ -3609,7 +3609,7 @@ cdef class RandomState: `a` > 1. The Zipf distribution (also known as the zeta distribution) is a - continuous probability distribution that satisfies Zipf's law: the + discrete probability distribution that satisfies Zipf's law: the frequency of an item is inversely proportional to its rank in a frequency table. @@ -3642,9 +3642,10 @@ cdef class RandomState: ----- The probability density for the Zipf distribution is - .. math:: p(x) = \\frac{x^{-a}}{\\zeta(a)}, + .. math:: p(k) = \\frac{k^{-a}}{\\zeta(a)}, - where :math:`\\zeta` is the Riemann Zeta function. + for integers :math:`k \geq 1`, where :math:`\\zeta` is the Riemann Zeta + function. It is named for the American linguist George Kingsley Zipf, who noted that the frequency of any word in a sample of a language is inversely @@ -3660,21 +3661,29 @@ cdef class RandomState: -------- Draw samples from the distribution: - >>> a = 2. # parameter - >>> s = np.random.zipf(a, 1000) + >>> a = 4.0 + >>> n = 20000 + >>> s = np.random.zipf(a, n) Display the histogram of the samples, along with - the probability density function: + the expected histogram based on the probability + density function: >>> import matplotlib.pyplot as plt - >>> from scipy import special # doctest: +SKIP + >>> from scipy.special import zeta # doctest: +SKIP + + `bincount` provides a fast histogram for small integers. - Truncate s values at 50 so plot is interesting: + >>> count = np.bincount(s) + >>> k = np.arange(1, s.max() + 1) - >>> count, bins, ignored = plt.hist(s[s<50], 50, density=True) - >>> x = np.arange(1., 50.) - >>> y = x**(-a) / special.zetac(a) # doctest: +SKIP - >>> plt.plot(x, y/max(y), linewidth=2, color='r') # doctest: +SKIP + >>> plt.bar(k, count[1:], alpha=0.5, label='sample count') + >>> plt.plot(k, n*(k**-a)/zeta(a), 'k.-', alpha=0.5, + ... label='expected count') # doctest: +SKIP + >>> plt.semilogy() + >>> plt.grid(alpha=0.4) + >>> plt.legend() + >>> plt.title(f'Zipf sample, a={a}, size={n}') >>> plt.show() """ |