Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96141
License: OTHER
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
import thinkplot
9
import thinkstats2
10
11
12
def BiasPmf(pmf, name, invert=False):
13
"""Returns the Pmf with oversampling proportional to value.
14
15
If pmf is the distribution of true values, the result is the
16
distribution that would be seen if values are oversampled in
17
proportion to their values; for example, if you ask students
18
how big their classes are, large classes are oversampled in
19
proportion to their size.
20
21
If invert=True, computes in inverse operation; for example,
22
unbiasing a sample collected from students.
23
24
Args:
25
pmf: Pmf object.
26
name: string name for the new Pmf.
27
invert: boolean
28
29
Returns:
30
Pmf object
31
"""
32
new_pmf = pmf.Copy(name=name)
33
34
for x, p in pmf.Items():
35
if invert:
36
new_pmf.Mult(x, 1.0/x)
37
else:
38
new_pmf.Mult(x, x)
39
40
new_pmf.Normalize()
41
return new_pmf
42
43
44
def UnbiasPmf(pmf, name):
45
"""Returns the Pmf with oversampling proportional to 1/value.
46
47
Args:
48
pmf: Pmf object.
49
name: string name for the new Pmf.
50
51
Returns:
52
Pmf object
53
"""
54
return BiasPmf(pmf, name, invert=True)
55
56
57
def ClassSizes():
58
59
# start with the actual distribution of class sizes from the book
60
d = {
61
7: 8,
62
12: 8,
63
17: 14,
64
22: 4,
65
27: 6,
66
32: 12,
67
37: 8,
68
42: 3,
69
47: 2,
70
}
71
72
# form the pmf
73
pmf = thinkstats2.MakePmfFromDict(d, 'actual')
74
print 'mean', pmf.Mean()
75
print 'var', pmf.Var()
76
77
# compute the biased pmf
78
biased_pmf = BiasPmf(pmf, 'observed')
79
print 'mean', biased_pmf.Mean()
80
print 'var', biased_pmf.Var()
81
82
# unbias the biased pmf
83
unbiased_pmf = UnbiasPmf(biased_pmf, 'unbiased')
84
print 'mean', unbiased_pmf.Mean()
85
print 'var', unbiased_pmf.Var()
86
87
# plot the Pmfs
88
thinkplot.Pmfs([pmf, biased_pmf])
89
thinkplot.Show(xlabel='Class size',
90
ylabel='PMF')
91
92
93
def main():
94
ClassSizes()
95
96
97
if __name__ == '__main__':
98
main()
99
100