Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.

This is the accompanying code for this book.

Website: http://greenteapress.com/wp/think-stats-2e/

Views: 7115
License: GPL3
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function
9
10
import math
11
import random
12
13
import brfss
14
import first
15
import thinkstats2
16
import thinkplot
17
18
19
def Summarize(data):
20
"""Prints summary statistics.
21
22
data: pandas Series
23
"""
24
mean = data.mean()
25
std = data.std()
26
median = thinkstats2.Median(data)
27
print('mean', mean)
28
print('std', std)
29
print('median', median)
30
print('skewness', thinkstats2.Skewness(data))
31
print('pearson skewness',
32
thinkstats2.PearsonMedianSkewness(data))
33
34
return mean, median
35
36
37
def ComputeSkewnesses():
38
"""Plots KDE of birthweight and adult weight.
39
"""
40
def VertLine(x, y):
41
thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)
42
43
live, firsts, others = first.MakeFrames()
44
data = live.totalwgt_lb.dropna()
45
print('Birth weight')
46
mean, median = Summarize(data)
47
48
y = 0.35
49
VertLine(mean, y)
50
thinkplot.Text(mean-0.15, 0.1*y, 'mean', horizontalalignment='right')
51
VertLine(median, y)
52
thinkplot.Text(median+0.1, 0.1*y, 'median', horizontalalignment='left')
53
54
pdf = thinkstats2.EstimatedPdf(data)
55
thinkplot.Pdf(pdf, label='birth weight')
56
thinkplot.Save(root='density_totalwgt_kde',
57
xlabel='lbs',
58
ylabel='PDF')
59
60
df = brfss.ReadBrfss(nrows=None)
61
data = df.wtkg2.dropna()
62
print('Adult weight')
63
mean, median = Summarize(data)
64
65
y = 0.02499
66
VertLine(mean, y)
67
thinkplot.Text(mean+1, 0.1*y, 'mean', horizontalalignment='left')
68
VertLine(median, y)
69
thinkplot.Text(median-1.5, 0.1*y, 'median', horizontalalignment='right')
70
71
pdf = thinkstats2.EstimatedPdf(data)
72
thinkplot.Pdf(pdf, label='adult weight')
73
thinkplot.Save(root='density_wtkg2_kde',
74
xlabel='kg',
75
ylabel='PDF',
76
xlim=[0, 200])
77
78
79
def MakePdfExample(n=500):
80
"""Plots a normal density function and a KDE estimate.
81
82
n: sample size
83
"""
84
# mean and var of women's heights in cm, from the BRFSS
85
mean, var = 163, 52.8
86
std = math.sqrt(var)
87
88
# make a PDF and compute a density, FWIW
89
pdf = thinkstats2.NormalPdf(mean, std)
90
print(pdf.Density(mean + std))
91
92
# make a PMF and plot it
93
thinkplot.PrePlot(2)
94
thinkplot.Pdf(pdf, label='normal')
95
96
# make a sample, make an estimated PDF, and plot it
97
sample = [random.gauss(mean, std) for _ in range(n)]
98
sample_pdf = thinkstats2.EstimatedPdf(sample)
99
thinkplot.Pdf(sample_pdf, label='sample KDE')
100
101
thinkplot.Save(root='pdf_example',
102
xlabel='Height (cm)',
103
ylabel='Density')
104
105
106
def main():
107
thinkstats2.RandomSeed(17)
108
109
MakePdfExample()
110
ComputeSkewnesses()
111
112
113
if __name__ == '__main__':
114
main()
115
116