Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.

This is the accompanying code for this book.

Website: http://greenteapress.com/wp/think-stats-2e/

Views: 7116
License: GPL3
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function
9
10
import math
11
import numpy as np
12
13
import nsfg
14
import thinkstats2
15
import thinkplot
16
17
18
def MakeFrames():
19
"""Reads pregnancy data and partitions first babies and others.
20
21
returns: DataFrames (all live births, first babies, others)
22
"""
23
preg = nsfg.ReadFemPreg()
24
25
live = preg[preg.outcome == 1]
26
firsts = live[live.birthord == 1]
27
others = live[live.birthord != 1]
28
29
assert len(live) == 9148
30
assert len(firsts) == 4413
31
assert len(others) == 4735
32
33
return live, firsts, others
34
35
36
def Summarize(live, firsts, others):
37
"""Print various summary statistics."""
38
39
mean = live.prglngth.mean()
40
var = live.prglngth.var()
41
std = live.prglngth.std()
42
43
print('Live mean', mean)
44
print('Live variance', var)
45
print('Live std', std)
46
47
mean1 = firsts.prglngth.mean()
48
mean2 = others.prglngth.mean()
49
50
var1 = firsts.prglngth.var()
51
var2 = others.prglngth.var()
52
53
print('Mean')
54
print('First babies', mean1)
55
print('Others', mean2)
56
57
print('Variance')
58
print('First babies', var1)
59
print('Others', var2)
60
61
print('Difference in weeks', mean1 - mean2)
62
print('Difference in hours', (mean1 - mean2) * 7 * 24)
63
64
print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100)
65
66
d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)
67
print('Cohen d', d)
68
69
70
def PrintExtremes(live):
71
"""Plots the histogram of pregnancy lengths and prints the extremes.
72
73
live: DataFrame of live births
74
"""
75
hist = thinkstats2.Hist(live.prglngth)
76
thinkplot.Hist(hist, label='live births')
77
78
thinkplot.Save(root='first_nsfg_hist_live',
79
title='Histogram',
80
xlabel='weeks',
81
ylabel='frequency')
82
83
print('Shortest lengths:')
84
for weeks, freq in hist.Smallest(10):
85
print(weeks, freq)
86
87
print('Longest lengths:')
88
for weeks, freq in hist.Largest(10):
89
print(weeks, freq)
90
91
92
def MakeHists(live):
93
"""Plot Hists for live births
94
95
live: DataFrame
96
others: DataFrame
97
"""
98
hist = thinkstats2.Hist(live.birthwgt_lb, label='birthwgt_lb')
99
thinkplot.Hist(hist)
100
thinkplot.Save(root='first_wgt_lb_hist',
101
xlabel='pounds',
102
ylabel='frequency',
103
axis=[-1, 14, 0, 3200])
104
105
hist = thinkstats2.Hist(live.birthwgt_oz, label='birthwgt_oz')
106
thinkplot.Hist(hist)
107
thinkplot.Save(root='first_wgt_oz_hist',
108
xlabel='ounces',
109
ylabel='frequency',
110
axis=[-1, 16, 0, 1200])
111
112
hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
113
thinkplot.Hist(hist)
114
thinkplot.Save(root='first_agepreg_hist',
115
xlabel='years',
116
ylabel='frequency')
117
118
hist = thinkstats2.Hist(live.prglngth, label='prglngth')
119
thinkplot.Hist(hist)
120
thinkplot.Save(root='first_prglngth_hist',
121
xlabel='weeks',
122
ylabel='frequency',
123
axis=[-1, 53, 0, 5000])
124
125
126
def MakeComparison(firsts, others):
127
"""Plots histograms of pregnancy length for first babies and others.
128
129
firsts: DataFrame
130
others: DataFrame
131
"""
132
first_hist = thinkstats2.Hist(firsts.prglngth, label='first')
133
other_hist = thinkstats2.Hist(others.prglngth, label='other')
134
135
width = 0.45
136
thinkplot.PrePlot(2)
137
thinkplot.Hist(first_hist, align='right', width=width)
138
thinkplot.Hist(other_hist, align='left', width=width)
139
140
thinkplot.Save(root='first_nsfg_hist',
141
title='Histogram',
142
xlabel='weeks',
143
ylabel='frequency',
144
axis=[27, 46, 0, 2700])
145
146
147
def main(script):
148
live, firsts, others = MakeFrames()
149
150
MakeHists(live)
151
PrintExtremes(live)
152
MakeComparison(firsts, others)
153
Summarize(live, firsts, others)
154
155
156
if __name__ == '__main__':
157
import sys
158
main(*sys.argv)
159
160
161
162