Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.

This is the accompanying code for this book.

Website: http://greenteapress.com/wp/think-stats-2e/

Views: 7115
License: GPL3
1
"""This file contains code for use with "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
from __future__ import print_function, division
9
10
import unittest
11
import random
12
13
from collections import Counter
14
import numpy as np
15
16
import thinkstats2
17
import thinkplot
18
19
class Test(unittest.TestCase):
20
21
def testOdds(self):
22
p = 0.75
23
o = thinkstats2.Odds(p)
24
self.assertEqual(o, 3)
25
26
p = thinkstats2.Probability(o)
27
self.assertEqual(p, 0.75)
28
29
p = thinkstats2.Probability2(3, 1)
30
self.assertEqual(p, 0.75)
31
32
def testMean(self):
33
t = [1, 1, 1, 3, 3, 591]
34
mean = thinkstats2.Mean(t)
35
self.assertEqual(mean, 100)
36
37
def testVar(self):
38
t = [1, 1, 1, 3, 3, 591]
39
mean = thinkstats2.Mean(t)
40
var1 = thinkstats2.Var(t)
41
var2 = thinkstats2.Var(t, mean)
42
43
self.assertAlmostEqual(mean, 100.0)
44
self.assertAlmostEqual(var1, 48217.0)
45
self.assertAlmostEqual(var2, 48217.0)
46
47
def testMeanVar(self):
48
t = [1, 1, 1, 3, 3, 591]
49
mean, var = thinkstats2.MeanVar(t)
50
51
self.assertAlmostEqual(mean, 100.0)
52
self.assertAlmostEqual(var, 48217.0)
53
54
def testBinomialCoef(self):
55
res = thinkstats2.BinomialCoef(10, 3)
56
self.assertEqual(round(res), 120)
57
58
res = thinkstats2.BinomialCoef(100, 4)
59
self.assertEqual(round(res), 3921225)
60
61
def testInterpolator(self):
62
xs = [1, 2, 3]
63
ys = [4, 5, 6]
64
interp = thinkstats2.Interpolator(xs, ys)
65
66
y = interp.Lookup(1)
67
self.assertAlmostEqual(y, 4)
68
69
y = interp.Lookup(2)
70
self.assertAlmostEqual(y, 5)
71
72
y = interp.Lookup(3)
73
self.assertAlmostEqual(y, 6)
74
75
y = interp.Lookup(1.5)
76
self.assertAlmostEqual(y, 4.5)
77
78
y = interp.Lookup(2.75)
79
self.assertAlmostEqual(y, 5.75)
80
81
x = interp.Reverse(4)
82
self.assertAlmostEqual(x, 1)
83
84
x = interp.Reverse(6)
85
self.assertAlmostEqual(x, 3)
86
87
x = interp.Reverse(4.5)
88
self.assertAlmostEqual(x, 1.5)
89
90
x = interp.Reverse(5.75)
91
self.assertAlmostEqual(x, 2.75)
92
93
def testTrim(self):
94
t = list(range(100))
95
random.shuffle(t)
96
trimmed = thinkstats2.Trim(t, p=0.05)
97
n = len(trimmed)
98
self.assertEqual(n, 90)
99
100
def testHist(self):
101
hist = thinkstats2.Hist('allen')
102
self.assertEqual(len(str(hist)), 38)
103
104
self.assertEqual(len(hist), 4)
105
self.assertEqual(hist.Freq('l'), 2)
106
107
hist = thinkstats2.Hist(Counter('allen'))
108
self.assertEqual(len(hist), 4)
109
self.assertEqual(hist.Freq('l'), 2)
110
111
hist2 = thinkstats2.Hist('nella')
112
self.assertEqual(hist, hist2)
113
114
def testPmf(self):
115
pmf = thinkstats2.Pmf('allen')
116
# this one might not be a robust test
117
self.assertEqual(len(str(pmf)), 45)
118
119
self.assertEqual(len(pmf), 4)
120
self.assertEqual(pmf.Prob('l'), 0.4)
121
self.assertEqual(pmf['l'], 0.4)
122
self.assertEqual(pmf.Percentile(50), 'l')
123
124
pmf = thinkstats2.Pmf(Counter('allen'))
125
self.assertEqual(len(pmf), 4)
126
self.assertEqual(pmf.Prob('l'), 0.4)
127
128
pmf = thinkstats2.Pmf(pmf)
129
self.assertEqual(len(pmf), 4)
130
self.assertEqual(pmf.Prob('l'), 0.4)
131
132
pmf2 = pmf.Copy()
133
self.assertEqual(pmf, pmf2)
134
135
xs, ys = pmf.Render()
136
self.assertEqual(tuple(xs), tuple(sorted(pmf.Values())))
137
138
def testSortedItems(self):
139
pmf = thinkstats2.Pmf('allen')
140
items = pmf.SortedItems()
141
self.assertEqual(len(items), 4)
142
143
pmf = thinkstats2.Pmf(['a', float('nan'), 1, pmf])
144
# should generate a warning
145
items = pmf.SortedItems()
146
self.assertEqual(len(items), 4)
147
148
def testPmfAddSub(self):
149
pmf = thinkstats2.Pmf([1, 2, 3, 4, 5, 6])
150
151
pmf1 = pmf + 1
152
self.assertAlmostEqual(pmf1.Mean(), 4.5)
153
154
pmf2 = pmf + pmf
155
self.assertAlmostEqual(pmf2.Mean(), 7.0)
156
157
pmf3 = pmf - 1
158
self.assertAlmostEqual(pmf3.Mean(), 2.5)
159
160
pmf4 = pmf - pmf
161
self.assertAlmostEqual(pmf4.Mean(), 0)
162
163
def testPmfMulDiv(self):
164
pmf = thinkstats2.Pmf([1, 2, 3, 4, 5, 6])
165
166
pmf1 = pmf * 2
167
self.assertAlmostEqual(pmf1.Mean(), 7)
168
169
pmf2 = pmf * pmf
170
self.assertAlmostEqual(pmf2.Mean(), 12.25)
171
172
pmf3 = pmf / 2
173
self.assertAlmostEqual(pmf3.Mean(), 1.75)
174
175
pmf4 = pmf / pmf
176
self.assertAlmostEqual(pmf4.Mean(), 1.4291667)
177
178
def testPmfProbLess(self):
179
d6 = thinkstats2.Pmf(range(1,7))
180
self.assertEqual(d6.ProbLess(4), 0.5)
181
self.assertEqual(d6.ProbGreater(3), 0.5)
182
two = d6 + d6
183
three = two + d6
184
# Pmf no longer supports magic comparators
185
self.assertAlmostEqual(two.ProbGreater(three), 0.15200617284)
186
self.assertAlmostEqual(two.ProbLess(three), 0.778549382716049)
187
188
def testPmfMax(self):
189
d6 = thinkstats2.Pmf(range(1,7))
190
two = d6 + d6
191
three = two + d6
192
cdf = three.Max(6)
193
thinkplot.Cdf(cdf)
194
self.assertAlmostEqual(cdf[14], 0.558230962626)
195
196
def testCdf(self):
197
t = [1, 2, 2, 3, 5]
198
pmf = thinkstats2.Pmf(t)
199
hist = thinkstats2.Hist(t)
200
201
cdf = thinkstats2.Cdf(pmf)
202
self.assertEqual(len(str(cdf)), 33)
203
204
self.assertEqual(cdf[0], 0)
205
self.assertAlmostEqual(cdf[1], 0.2)
206
self.assertAlmostEqual(cdf[2], 0.6)
207
self.assertAlmostEqual(cdf[3], 0.8)
208
self.assertAlmostEqual(cdf[4], 0.8)
209
self.assertAlmostEqual(cdf[5], 1)
210
self.assertAlmostEqual(cdf[6], 1)
211
212
xs = list(range(-1, 7))
213
ps = cdf.Probs(xs)
214
for p1, p2 in zip(ps, [0, 0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
215
self.assertAlmostEqual(p1, p2)
216
217
self.assertEqual(cdf.Value(0), 1)
218
self.assertEqual(cdf.Value(0.1), 1)
219
self.assertEqual(cdf.Value(0.2), 1)
220
self.assertEqual(cdf.Value(0.3), 2)
221
self.assertEqual(cdf.Value(0.4), 2)
222
self.assertEqual(cdf.Value(0.5), 2)
223
self.assertEqual(cdf.Value(0.6), 2)
224
self.assertEqual(cdf.Value(0.7), 3)
225
self.assertEqual(cdf.Value(0.8), 3)
226
self.assertEqual(cdf.Value(0.9), 5)
227
self.assertEqual(cdf.Value(1), 5)
228
229
ps = np.linspace(0, 1, 11)
230
xs = cdf.ValueArray(ps)
231
self.assertTrue((xs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())
232
233
np.random.seed(17)
234
xs = cdf.Sample(7)
235
self.assertListEqual(xs.tolist(), [2, 2, 1, 1, 3, 3, 3])
236
237
# when you make a Cdf from a Pdf, you might get some floating
238
# point representation error
239
self.assertEqual(len(cdf), 4)
240
self.assertAlmostEqual(cdf.Prob(2), 0.6)
241
self.assertAlmostEqual(cdf[2], 0.6)
242
self.assertEqual(cdf.Value(0.6), 2)
243
244
cdf = thinkstats2.MakeCdfFromPmf(pmf)
245
self.assertEqual(len(cdf), 4)
246
self.assertAlmostEqual(cdf.Prob(2), 0.6)
247
self.assertEqual(cdf.Value(0.6), 2)
248
249
cdf = thinkstats2.MakeCdfFromItems(pmf.Items())
250
self.assertEqual(len(cdf), 4)
251
self.assertAlmostEqual(cdf.Prob(2), 0.6)
252
self.assertEqual(cdf.Value(0.6), 2)
253
254
cdf = thinkstats2.Cdf(pmf.d)
255
self.assertEqual(len(cdf), 4)
256
self.assertAlmostEqual(cdf.Prob(2), 0.6)
257
self.assertEqual(cdf.Value(0.6), 2)
258
259
cdf = thinkstats2.MakeCdfFromDict(pmf.d)
260
self.assertEqual(len(cdf), 4)
261
self.assertAlmostEqual(cdf.Prob(2), 0.6)
262
self.assertEqual(cdf.Value(0.6), 2)
263
264
cdf = thinkstats2.Cdf(hist)
265
self.assertEqual(len(cdf), 4)
266
self.assertEqual(cdf.Prob(2), 0.6)
267
self.assertEqual(cdf.Value(0.6), 2)
268
269
cdf = thinkstats2.MakeCdfFromHist(hist)
270
self.assertEqual(len(cdf), 4)
271
self.assertEqual(cdf.Prob(2), 0.6)
272
self.assertEqual(cdf.Value(0.6), 2)
273
274
cdf = thinkstats2.Cdf(t)
275
self.assertEqual(len(cdf), 4)
276
self.assertEqual(cdf.Prob(2), 0.6)
277
self.assertEqual(cdf.Value(0.6), 2)
278
279
cdf = thinkstats2.MakeCdfFromList(t)
280
self.assertEqual(len(cdf), 4)
281
self.assertEqual(cdf.Prob(2), 0.6)
282
self.assertEqual(cdf.Value(0.6), 2)
283
284
cdf = thinkstats2.Cdf(Counter(t))
285
self.assertEqual(len(cdf), 4)
286
self.assertEqual(cdf.Prob(2), 0.6)
287
self.assertEqual(cdf.Value(0.6), 2)
288
289
cdf2 = cdf.Copy()
290
self.assertEqual(cdf2.Prob(2), 0.6)
291
self.assertEqual(cdf2.Value(0.6), 2)
292
293
def testShift(self):
294
t = [1, 2, 2, 3, 5]
295
cdf = thinkstats2.Cdf(t)
296
cdf2 = cdf.Shift(1)
297
self.assertEqual(cdf[1], cdf2[2])
298
299
def testScale(self):
300
t = [1, 2, 2, 3, 5]
301
cdf = thinkstats2.Cdf(t)
302
cdf2 = cdf.Scale(2)
303
self.assertEqual(cdf[2], cdf2[4])
304
305
def testCdfRender(self):
306
t = [1, 2, 2, 3, 5]
307
cdf = thinkstats2.Cdf(t)
308
xs, ps = cdf.Render()
309
self.assertEqual(xs[0], 1)
310
self.assertEqual(ps[2], 0.2)
311
self.assertEqual(sum(xs), 22)
312
self.assertEqual(sum(ps), 4.2)
313
314
def testPmfFromCdf(self):
315
t = [1, 2, 2, 3, 5]
316
pmf = thinkstats2.Pmf(t)
317
cdf = thinkstats2.Cdf(pmf)
318
pmf2 = thinkstats2.Pmf(cdf)
319
for x in pmf.Values():
320
self.assertAlmostEqual(pmf[x], pmf2[x])
321
322
pmf3 = cdf.MakePmf()
323
for x in pmf.Values():
324
self.assertAlmostEqual(pmf[x], pmf3[x])
325
326
def testNormalPdf(self):
327
pdf = thinkstats2.NormalPdf(mu=1, sigma=2)
328
self.assertEqual(len(str(pdf)), 29)
329
self.assertAlmostEqual(pdf.Density(3), 0.12098536226)
330
331
pmf = pdf.MakePmf()
332
self.assertAlmostEqual(pmf[1.0], 0.0239951295619)
333
xs, ps = pdf.Render()
334
self.assertEqual(xs[0], -5.0)
335
self.assertAlmostEqual(ps[0], 0.0022159242059690038)
336
337
pmf = thinkstats2.Pmf(pdf)
338
self.assertAlmostEqual(pmf[1.0], 0.0239951295619)
339
xs, ps = pmf.Render()
340
self.assertEqual(xs[0], -5.0)
341
self.assertAlmostEqual(ps[0], 0.00026656181123)
342
343
cdf = thinkstats2.Cdf(pdf)
344
self.assertAlmostEqual(cdf[1.0], 0.51199756478094904)
345
xs, ps = cdf.Render()
346
self.assertEqual(xs[0], -5.0)
347
self.assertAlmostEqual(ps[0], 0.0)
348
349
def testExponentialPdf(self):
350
pdf = thinkstats2.ExponentialPdf(lam=0.5)
351
self.assertEqual(len(str(pdf)), 24)
352
self.assertAlmostEqual(pdf.Density(3), 0.11156508007421491)
353
pmf = pdf.MakePmf()
354
self.assertAlmostEqual(pmf[1.0], 0.02977166586593202)
355
xs, ps = pdf.Render()
356
self.assertEqual(xs[0], 0.0)
357
self.assertAlmostEqual(ps[0], 0.5)
358
359
def testEstimatedPdf(self):
360
pdf = thinkstats2.EstimatedPdf([1, 2, 2, 3, 5])
361
self.assertEqual(len(str(pdf)), 30)
362
self.assertAlmostEqual(pdf.Density(3)[0], 0.19629968)
363
pmf = pdf.MakePmf()
364
self.assertAlmostEqual(pmf[1.0], 0.010172282816895044)
365
pmf = pdf.MakePmf(low=0, high=6)
366
self.assertAlmostEqual(pmf[0.0], 0.0050742294053582942)
367
368
def testEvalNormalCdf(self):
369
p = thinkstats2.EvalNormalCdf(0)
370
self.assertAlmostEqual(p, 0.5)
371
372
p = thinkstats2.EvalNormalCdf(2, 2, 3)
373
self.assertAlmostEqual(p, 0.5)
374
375
p = thinkstats2.EvalNormalCdf(1000, 0, 1)
376
self.assertAlmostEqual(p, 1.0)
377
378
p = thinkstats2.EvalNormalCdf(-1000, 0, 1)
379
self.assertAlmostEqual(p, 0.0)
380
381
x = thinkstats2.EvalNormalCdfInverse(0.95, 0, 1)
382
self.assertAlmostEqual(x, 1.64485362695)
383
x = thinkstats2.EvalNormalCdfInverse(0.05, 0, 1)
384
self.assertAlmostEqual(x, -1.64485362695)
385
386
def testEvalPoissonPmf(self):
387
p = thinkstats2.EvalPoissonPmf(2, 1)
388
self.assertAlmostEqual(p, 0.1839397205)
389
390
def testCov(self):
391
t = [0, 4, 7, 3, 8, 1, 6, 2, 9, 5]
392
a = np.array(t)
393
t2 = [5, 4, 3, 0, 8, 9, 7, 6, 2, 1]
394
395
self.assertAlmostEqual(thinkstats2.Cov(t, a), 8.25)
396
self.assertAlmostEqual(thinkstats2.Cov(t, -a), -8.25)
397
398
self.assertAlmostEqual(thinkstats2.Corr(t, a), 1)
399
self.assertAlmostEqual(thinkstats2.Corr(t, -a), -1)
400
self.assertAlmostEqual(thinkstats2.Corr(t, t2), -0.1878787878)
401
402
self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, -a), -1)
403
self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, t2), -0.1878787878)
404
405
def testReadStataDct(self):
406
dct = thinkstats2.ReadStataDct('2002FemPreg.dct')
407
self.assertEqual(len(dct.variables), 243)
408
self.assertEqual(len(dct.colspecs), 243)
409
self.assertEqual(len(dct.names), 243)
410
self.assertEqual(dct.colspecs[-1][1], -1)
411
412
def testCdfProbs(self):
413
t = [-1, 1, 2, 2, 3, 5]
414
cdf = thinkstats2.Cdf(t)
415
ps = cdf.Probs(t)
416
print(ps)
417
418
def testPmfOfHist(self):
419
bowl1 = thinkstats2.Hist(dict(vanilla=30, chocolate=10))
420
bowl2 = thinkstats2.Hist(dict(vanilla=20, chocolate=20))
421
pmf = thinkstats2.Pmf([bowl1, bowl2])
422
pmf.Print()
423
424
if __name__ == "__main__":
425
unittest.main()
426
427