Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.

This is the accompanying code for this book.

Website: http://greenteapress.com/wp/think-stats-2e/

Views: 7120
License: GPL3
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function, division
9
10
import thinkstats2
11
import thinkplot
12
13
import math
14
import random
15
import numpy as np
16
17
18
def MeanError(estimates, actual):
19
"""Computes the mean error of a sequence of estimates.
20
21
estimate: sequence of numbers
22
actual: actual value
23
24
returns: float mean error
25
"""
26
errors = [estimate-actual for estimate in estimates]
27
return np.mean(errors)
28
29
30
def RMSE(estimates, actual):
31
"""Computes the root mean squared error of a sequence of estimates.
32
33
estimate: sequence of numbers
34
actual: actual value
35
36
returns: float RMSE
37
"""
38
e2 = [(estimate-actual)**2 for estimate in estimates]
39
mse = np.mean(e2)
40
return math.sqrt(mse)
41
42
43
def Estimate1(n=7, m=1000):
44
"""Evaluates RMSE of sample mean and median as estimators.
45
46
n: sample size
47
m: number of iterations
48
"""
49
mu = 0
50
sigma = 1
51
52
means = []
53
medians = []
54
for _ in range(m):
55
xs = [random.gauss(mu, sigma) for _ in range(n)]
56
xbar = np.mean(xs)
57
median = np.median(xs)
58
means.append(xbar)
59
medians.append(median)
60
61
print('Experiment 1')
62
print('rmse xbar', RMSE(means, mu))
63
print('rmse median', RMSE(medians, mu))
64
65
66
def Estimate2(n=7, m=1000):
67
"""Evaluates S and Sn-1 as estimators of sample variance.
68
69
n: sample size
70
m: number of iterations
71
"""
72
mu = 0
73
sigma = 1
74
75
estimates1 = []
76
estimates2 = []
77
for _ in range(m):
78
xs = [random.gauss(mu, sigma) for _ in range(n)]
79
biased = np.var(xs)
80
unbiased = np.var(xs, ddof=1)
81
estimates1.append(biased)
82
estimates2.append(unbiased)
83
84
print('Experiment 2')
85
print('mean error biased', MeanError(estimates1, sigma**2))
86
print('mean error unbiased', MeanError(estimates2, sigma**2))
87
88
89
def Estimate3(n=7, m=1000):
90
"""Evaluates L and Lm as estimators of the exponential parameter.
91
92
n: sample size
93
m: number of iterations
94
"""
95
lam = 2
96
97
means = []
98
medians = []
99
for _ in range(m):
100
xs = np.random.exponential(1/lam, n)
101
L = 1 / np.mean(xs)
102
Lm = math.log(2) / np.median(xs)
103
means.append(L)
104
medians.append(Lm)
105
106
print('Experiment 3')
107
print('rmse L', RMSE(means, lam))
108
print('rmse Lm', RMSE(medians, lam))
109
print('mean error L', MeanError(means, lam))
110
print('mean error Lm', MeanError(medians, lam))
111
112
113
def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
114
"""Plots the sampling distribution of the sample mean.
115
116
mu: hypothetical population mean
117
sigma: hypothetical population standard deviation
118
n: sample size
119
m: number of iterations
120
"""
121
def VertLine(x, y=1):
122
thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)
123
124
means = []
125
for _ in range(m):
126
xs = np.random.normal(mu, sigma, n)
127
xbar = np.mean(xs)
128
means.append(xbar)
129
130
stderr = RMSE(means, mu)
131
print('standard error', stderr)
132
133
cdf = thinkstats2.Cdf(means)
134
ci = cdf.Percentile(5), cdf.Percentile(95)
135
print('confidence interval', ci)
136
VertLine(ci[0])
137
VertLine(ci[1])
138
139
# plot the CDF
140
thinkplot.Cdf(cdf)
141
thinkplot.Save(root='estimation1',
142
xlabel='sample mean',
143
ylabel='CDF',
144
title='Sampling distribution')
145
146
147
def main():
148
thinkstats2.RandomSeed(17)
149
150
Estimate1()
151
Estimate2()
152
Estimate3(m=1000)
153
SimulateSample()
154
155
156
157
if __name__ == '__main__':
158
main()
159
160