Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.

This is the accompanying code for this book.

Website: http://greenteapress.com/wp/think-stats-2e/

Views: 7115
License: GPL3
1
"""This file contains code for use with "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function, division
9
10
import numpy as np
11
import random
12
13
import first
14
import normal
15
import thinkstats2
16
import thinkplot
17
18
19
def PlotPregLengths(live, firsts, others):
20
"""Plots sampling distributions under the null and alternate hypotheses.
21
22
live, firsts, others: DataFrames
23
24
Results:
25
null hypothesis N(0, 0.00319708)
26
0.0837707042554 0.0837707042554 (90% CI)
27
28
estimated params N(0.0780373, 0.00321144)
29
-0.0151758158699 0.171250349425 (90% CI)
30
31
Sampling distribution under the null hypothesis is centered
32
around 0.
33
34
Sampling distribution under the null hypothesis is centered
35
around the observed difference, 0.078.
36
37
The variance of the two distributions is very similar; in practice,
38
you could reasonably compute whichever one is easier.
39
40
"""
41
print('prglngth example')
42
delta = firsts.prglngth.mean() - others.prglngth.mean()
43
print(delta)
44
45
dist1 = normal.SamplingDistMean(live.prglngth, len(firsts))
46
dist2 = normal.SamplingDistMean(live.prglngth, len(others))
47
dist = dist1 - dist2
48
print('null hypothesis', dist)
49
print(dist.Prob(-delta), 1 - dist.Prob(delta))
50
51
thinkplot.PrePlot(2)
52
thinkplot.Plot(dist, label='null hypothesis')
53
54
dist1 = normal.SamplingDistMean(firsts.prglngth, len(firsts))
55
dist2 = normal.SamplingDistMean(others.prglngth, len(others))
56
dist = dist1 - dist2
57
print('estimated params', dist)
58
print(dist.Percentile(5), dist.Percentile(95))
59
60
thinkplot.Plot(dist, label='estimated params')
61
thinkplot.Show(xlabel='difference in means (weeks)',
62
ylabel='CDF')
63
64
65
def GenerateAdultWeight(birth_weights, n):
66
"""Generate a random adult weight by simulating annual gain.
67
68
birth_weights: sequence of birth weights in lbs
69
n: number of years to simulate
70
71
returns: adult weight in lbs
72
"""
73
bw = random.choice(birth_weights)
74
factors = np.random.normal(1.09, 0.03, n)
75
aw = bw * np.prod(factors)
76
return aw
77
78
79
def PlotAdultWeights(live):
80
"""Makes a normal probability plot of log10 adult weight.
81
82
live: DataFrame of live births
83
84
results:
85
86
With n=40 the distribution is approximately lognormal except for
87
the lowest weights.
88
89
Actual distribution might deviate from lognormal because it is
90
a mixture of people at different ages, or because annual weight
91
gains are correlated.
92
"""
93
birth_weights = live.totalwgt_lb.dropna().values
94
aws = [GenerateAdultWeight(birth_weights, 40) for _ in range(1000)]
95
log_aws = np.log10(aws)
96
thinkstats2.NormalProbabilityPlot(log_aws)
97
thinkplot.Show(xlabel='standard normal values',
98
ylabel='adult weight (log10 lbs)')
99
100
101
def TestIntervention():
102
"""Tests whether reported changes are statistically significant.
103
104
Results:
105
-1.66 4.73095323208e-05
106
-0.26 0.125267987207
107
1.4 0.00182694836898
108
109
Conclusions:
110
111
1) Gender gap before intervention was 1.66 points (p-value 5e-5)
112
113
2) Genger gap after was 0.26 points (p-value 0.13, no significant)
114
115
3) Change in gender gap was 1.4 points (p-value 0.002, significant).
116
"""
117
male_before = normal.Normal(3.57, 0.28**2)
118
male_after = normal.Normal(3.44, 0.16**2)
119
120
female_before = normal.Normal(1.91, 0.32**2)
121
female_after = normal.Normal(3.18, 0.16**2)
122
123
diff_before = female_before - male_before
124
print('mean, p-value', diff_before.mu, 1-diff_before.Prob(0))
125
print('CI', diff_before.Percentile(5), diff_before.Percentile(95))
126
print('stderr', diff_before.sigma)
127
128
diff_after = female_after - male_after
129
print('mean, p-value', diff_after.mu, 1-diff_after.Prob(0))
130
print('CI', diff_after.Percentile(5), diff_after.Percentile(95))
131
print('stderr', diff_after.sigma)
132
133
diff = diff_after - diff_before
134
print('mean, p-value', diff.mu, diff.Prob(0))
135
print('CI', diff.Percentile(5), diff.Percentile(95))
136
print('stderr', diff.sigma)
137
138
139
def main():
140
thinkstats2.RandomSeed(17)
141
142
TestIntervention()
143
return
144
145
live, firsts, others = first.MakeFrames()
146
PlotAdultWeights(live)
147
148
PlotPregLengths(live, firsts, others)
149
150
151
if __name__ == '__main__':
152
main()
153
154