Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96137
License: OTHER
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2011 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function
9
10
import numpy as np
11
import random
12
import scipy.stats
13
14
import thinkstats2
15
import thinkplot
16
17
from collections import Counter
18
19
20
def ParetoCdf(x, alpha, xmin):
21
"""Evaluates CDF of the Pareto distribution with parameters alpha, xmin."""
22
if x < xmin:
23
return 0
24
return 1 - pow(x / xmin, -alpha)
25
26
27
def ParetoMedian(xmin, alpha):
28
"""Computes the median of a Pareto distribution."""
29
return xmin * pow(2, 1/alpha)
30
31
32
def MakeParetoCdf():
33
"""Generates a plot of the CDF of height in Pareto World."""
34
n = 50
35
max = 1000.0
36
xs = [max*i/n for i in range(n)]
37
38
xmin = 100
39
alpha = 1.7
40
ps = [ParetoCdf(x, alpha, xmin) for x in xs]
41
print('Median', ParetoMedian(xmin, alpha))
42
43
pyplot.clf()
44
pyplot.plot(xs, ps, linewidth=2)
45
myplot.Save('pareto_world1',
46
title = 'Pareto CDF',
47
xlabel = 'height (cm)',
48
ylabel = 'CDF',
49
legend=False)
50
51
52
def MakeFigure(xmin=100, alpha=1.7, mu=150, sigma=25):
53
"""Makes a figure showing the CDF of height in ParetoWorld.
54
55
Compared to a normal distribution.
56
57
xmin: parameter of the Pareto distribution
58
alpha: parameter of the Pareto distribution
59
mu: parameter of the Normal distribution
60
sigma: parameter of the Normal distribution
61
"""
62
63
t1 = [xmin * random.paretovariate(alpha) for i in range(10000)]
64
cdf1 = Cdf.MakeCdfFromList(t1, name='pareto')
65
66
t2 = [random.normalvariate(mu, sigma) for i in range(10000)]
67
cdf2 = Cdf.MakeCdfFromList(t2, name='normal')
68
69
myplot.Clf()
70
myplot.Cdfs([cdf1, cdf2])
71
myplot.Save(root='pareto_world2',
72
title='Pareto World',
73
xlabel='height (cm)',
74
ylabel='CDF')
75
76
77
def TallestPareto(iters=2, n=10000, xmin=100, alpha=1.7):
78
"""Find the tallest person in Pareto World.
79
80
iters: how many samples to generate
81
n: how many in each sample
82
xmin: parameter of the Pareto distribution
83
alpha: parameter of the Pareto distribution
84
"""
85
tallest = 0
86
for i in range(iters):
87
t = [xmin * random.paretovariate(alpha) for i in range(n)]
88
tallest = max(max(t), tallest)
89
return tallest
90
91
92
def ParetoSample(alpha, xmin, n):
93
t = scipy.stats.pareto.rvs(b=alpha, size=n) * xmin
94
return t.round()
95
96
97
def main():
98
99
counter = Counter()
100
for i in range(10000):
101
sample = ParetoSample(1.7, 0.001, 10000)
102
counter.update(Counter(sample))
103
104
print(len(counter))
105
return
106
107
pmf = thinkstats2.Pmf(counter)
108
print('mean', pmf.Mean())
109
for x, prob in pmf.Largest(10):
110
print(x)
111
112
cdf = thinkstats2.Cdf(pmf)
113
thinkplot.Cdf(cdf, complement=True)
114
thinkplot.Show(xscale='log',
115
yscale='log')
116
return
117
118
MakeFigure()
119
MakeParetoCdf()
120
print(TallestPareto(iters=2))
121
122
123
if __name__ == "__main__":
124
main()
125
126