Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.

This is the accompanying code for this book.

Website: http://greenteapress.com/wp/think-stats-2e/

Views: 7115
License: GPL3
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function
9
10
import math
11
import numpy as np
12
13
import nsfg
14
import first
15
import thinkstats2
16
import thinkplot
17
18
19
def MakeHists(live):
20
"""Plot Hists for live births
21
22
live: DataFrame
23
others: DataFrame
24
"""
25
hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
26
thinkplot.PrePlot(2, cols=2)
27
28
thinkplot.SubPlot(1)
29
thinkplot.Hist(hist)
30
thinkplot.Config(xlabel='years',
31
ylabel='frequency',
32
axis=[0, 45, 0, 700])
33
34
thinkplot.SubPlot(2)
35
thinkplot.Pmf(hist)
36
37
thinkplot.Save(root='probability_agepreg_hist',
38
xlabel='years',
39
axis=[0, 45, 0, 700])
40
41
42
def MakeFigures(firsts, others):
43
"""Plot Pmfs of pregnancy length.
44
45
firsts: DataFrame
46
others: DataFrame
47
"""
48
# plot the PMFs
49
first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first')
50
other_pmf = thinkstats2.Pmf(others.prglngth, label='other')
51
width = 0.45
52
53
thinkplot.PrePlot(2, cols=2)
54
thinkplot.Hist(first_pmf, align='right', width=width)
55
thinkplot.Hist(other_pmf, align='left', width=width)
56
thinkplot.Config(xlabel='weeks',
57
ylabel='probability',
58
axis=[27, 46, 0, 0.6])
59
60
thinkplot.PrePlot(2)
61
thinkplot.SubPlot(2)
62
thinkplot.Pmfs([first_pmf, other_pmf])
63
thinkplot.Save(root='probability_nsfg_pmf',
64
xlabel='weeks',
65
axis=[27, 46, 0, 0.6])
66
67
# plot the differences in the PMFs
68
weeks = range(35, 46)
69
diffs = []
70
for week in weeks:
71
p1 = first_pmf.Prob(week)
72
p2 = other_pmf.Prob(week)
73
diff = 100 * (p1 - p2)
74
diffs.append(diff)
75
76
thinkplot.Bar(weeks, diffs)
77
thinkplot.Save(root='probability_nsfg_diffs',
78
title='Difference in PMFs',
79
xlabel='weeks',
80
ylabel='percentage points',
81
legend=False)
82
83
84
def BiasPmf(pmf, label=''):
85
"""Returns the Pmf with oversampling proportional to value.
86
87
If pmf is the distribution of true values, the result is the
88
distribution that would be seen if values are oversampled in
89
proportion to their values; for example, if you ask students
90
how big their classes are, large classes are oversampled in
91
proportion to their size.
92
93
Args:
94
pmf: Pmf object.
95
label: string label for the new Pmf.
96
97
Returns:
98
Pmf object
99
"""
100
new_pmf = pmf.Copy(label=label)
101
102
for x, p in pmf.Items():
103
new_pmf.Mult(x, x)
104
105
new_pmf.Normalize()
106
return new_pmf
107
108
109
def UnbiasPmf(pmf, label=''):
110
"""Returns the Pmf with oversampling proportional to 1/value.
111
112
Args:
113
pmf: Pmf object.
114
label: string label for the new Pmf.
115
116
Returns:
117
Pmf object
118
"""
119
new_pmf = pmf.Copy(label=label)
120
121
for x, p in pmf.Items():
122
new_pmf.Mult(x, 1.0/x)
123
124
new_pmf.Normalize()
125
return new_pmf
126
127
128
def ClassSizes():
129
"""Generate PMFs of observed and actual class size.
130
"""
131
# start with the actual distribution of class sizes from the book
132
d = { 7: 8, 12: 8, 17: 14, 22: 4,
133
27: 6, 32: 12, 37: 8, 42: 3, 47: 2 }
134
135
# form the pmf
136
pmf = thinkstats2.Pmf(d, label='actual')
137
print('mean', pmf.Mean())
138
print('var', pmf.Var())
139
140
# compute the biased pmf
141
biased_pmf = BiasPmf(pmf, label='observed')
142
print('mean', biased_pmf.Mean())
143
print('var', biased_pmf.Var())
144
145
# unbias the biased pmf
146
unbiased_pmf = UnbiasPmf(biased_pmf, label='unbiased')
147
print('mean', unbiased_pmf.Mean())
148
print('var', unbiased_pmf.Var())
149
150
# plot the Pmfs
151
thinkplot.PrePlot(2)
152
thinkplot.Pmfs([pmf, biased_pmf])
153
thinkplot.Save(root='class_size1',
154
xlabel='class size',
155
ylabel='PMF',
156
axis=[0, 52, 0, 0.27])
157
158
159
def main(script):
160
live, firsts, others = first.MakeFrames()
161
MakeFigures(firsts, others)
162
MakeHists(live)
163
164
ClassSizes()
165
166
167
if __name__ == '__main__':
168
import sys
169
main(*sys.argv)
170
171
172
173