CoCalc -- thinkstats2

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Website: http://greenteapress.com/wp/think-stats-2e/
Path: think-stats-code / thinkstats2_test.py
Views: ⁷¹¹⁵
License: GPL3
1
"""This file contains code for use with "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3

4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7

8
from __future__ import print_function, division
9

10
import unittest
11
import random
12

13
from collections import Counter
14
import numpy as np
15

16
import thinkstats2
17
import thinkplot
18

19
class Test(unittest.TestCase):
20

21
    def testOdds(self):
22
        p = 0.75
23
        o = thinkstats2.Odds(p)
24
        self.assertEqual(o, 3)
25

26
        p = thinkstats2.Probability(o)
27
        self.assertEqual(p, 0.75)
28
        
29
        p = thinkstats2.Probability2(3, 1)
30
        self.assertEqual(p, 0.75)
31
        
32
    def testMean(self):
33
        t = [1, 1, 1, 3, 3, 591]
34
        mean = thinkstats2.Mean(t)
35
        self.assertEqual(mean, 100)
36

37
    def testVar(self):
38
        t = [1, 1, 1, 3, 3, 591]
39
        mean = thinkstats2.Mean(t)
40
        var1 = thinkstats2.Var(t)
41
        var2 = thinkstats2.Var(t, mean)
42
        
43
        self.assertAlmostEqual(mean, 100.0)
44
        self.assertAlmostEqual(var1, 48217.0)
45
        self.assertAlmostEqual(var2, 48217.0)
46

47
    def testMeanVar(self):
48
        t = [1, 1, 1, 3, 3, 591]
49
        mean, var = thinkstats2.MeanVar(t)
50
        
51
        self.assertAlmostEqual(mean, 100.0)
52
        self.assertAlmostEqual(var, 48217.0)
53

54
    def testBinomialCoef(self):
55
        res = thinkstats2.BinomialCoef(10, 3)
56
        self.assertEqual(round(res), 120)
57

58
        res = thinkstats2.BinomialCoef(100, 4)
59
        self.assertEqual(round(res), 3921225)
60

61
    def testInterpolator(self):
62
        xs = [1, 2, 3]
63
        ys = [4, 5, 6]
64
        interp = thinkstats2.Interpolator(xs, ys)
65

66
        y = interp.Lookup(1)
67
        self.assertAlmostEqual(y, 4)
68

69
        y = interp.Lookup(2)
70
        self.assertAlmostEqual(y, 5)
71

72
        y = interp.Lookup(3)
73
        self.assertAlmostEqual(y, 6)
74

75
        y = interp.Lookup(1.5)
76
        self.assertAlmostEqual(y, 4.5)
77

78
        y = interp.Lookup(2.75)
79
        self.assertAlmostEqual(y, 5.75)
80

81
        x = interp.Reverse(4)
82
        self.assertAlmostEqual(x, 1)
83

84
        x = interp.Reverse(6)
85
        self.assertAlmostEqual(x, 3)
86

87
        x = interp.Reverse(4.5)
88
        self.assertAlmostEqual(x, 1.5)
89

90
        x = interp.Reverse(5.75)
91
        self.assertAlmostEqual(x, 2.75)
92

93
    def testTrim(self):
94
        t = list(range(100))
95
        random.shuffle(t)
96
        trimmed = thinkstats2.Trim(t, p=0.05)
97
        n = len(trimmed)
98
        self.assertEqual(n, 90)
99

100
    def testHist(self):
101
        hist = thinkstats2.Hist('allen')
102
        self.assertEqual(len(str(hist)), 38)
103

104
        self.assertEqual(len(hist), 4)
105
        self.assertEqual(hist.Freq('l'), 2)
106

107
        hist = thinkstats2.Hist(Counter('allen'))
108
        self.assertEqual(len(hist), 4)
109
        self.assertEqual(hist.Freq('l'), 2)
110

111
        hist2 = thinkstats2.Hist('nella')
112
        self.assertEqual(hist, hist2)
113

114
    def testPmf(self):
115
        pmf = thinkstats2.Pmf('allen')
116
        # this one might not be a robust test
117
        self.assertEqual(len(str(pmf)), 45)
118

119
        self.assertEqual(len(pmf), 4)
120
        self.assertEqual(pmf.Prob('l'), 0.4)
121
        self.assertEqual(pmf['l'], 0.4)
122
        self.assertEqual(pmf.Percentile(50), 'l')
123

124
        pmf = thinkstats2.Pmf(Counter('allen'))
125
        self.assertEqual(len(pmf), 4)
126
        self.assertEqual(pmf.Prob('l'), 0.4)
127

128
        pmf = thinkstats2.Pmf(pmf)
129
        self.assertEqual(len(pmf), 4)
130
        self.assertEqual(pmf.Prob('l'), 0.4)
131

132
        pmf2 = pmf.Copy()
133
        self.assertEqual(pmf, pmf2)
134

135
        xs, ys = pmf.Render()
136
        self.assertEqual(tuple(xs), tuple(sorted(pmf.Values())))
137

138
    def testSortedItems(self):
139
        pmf = thinkstats2.Pmf('allen')
140
        items = pmf.SortedItems()
141
        self.assertEqual(len(items), 4)
142

143
        pmf =  thinkstats2.Pmf(['a', float('nan'), 1, pmf])
144
        # should generate a warning
145
        items = pmf.SortedItems()
146
        self.assertEqual(len(items), 4)
147

148
    def testPmfAddSub(self):
149
        pmf = thinkstats2.Pmf([1, 2, 3, 4, 5, 6])
150

151
        pmf1 = pmf + 1
152
        self.assertAlmostEqual(pmf1.Mean(), 4.5)
153

154
        pmf2 = pmf + pmf
155
        self.assertAlmostEqual(pmf2.Mean(), 7.0)
156

157
        pmf3 = pmf - 1
158
        self.assertAlmostEqual(pmf3.Mean(), 2.5)
159

160
        pmf4 = pmf - pmf
161
        self.assertAlmostEqual(pmf4.Mean(), 0)
162

163
    def testPmfMulDiv(self):
164
        pmf = thinkstats2.Pmf([1, 2, 3, 4, 5, 6])
165

166
        pmf1 = pmf * 2
167
        self.assertAlmostEqual(pmf1.Mean(), 7)
168

169
        pmf2 = pmf * pmf
170
        self.assertAlmostEqual(pmf2.Mean(), 12.25)
171

172
        pmf3 = pmf / 2
173
        self.assertAlmostEqual(pmf3.Mean(), 1.75)
174

175
        pmf4 = pmf / pmf
176
        self.assertAlmostEqual(pmf4.Mean(), 1.4291667)
177

178
    def testPmfProbLess(self):
179
        d6 = thinkstats2.Pmf(range(1,7))
180
        self.assertEqual(d6.ProbLess(4), 0.5)
181
        self.assertEqual(d6.ProbGreater(3), 0.5)
182
        two = d6 + d6
183
        three = two + d6
184
        # Pmf no longer supports magic comparators
185
        self.assertAlmostEqual(two.ProbGreater(three), 0.15200617284)
186
        self.assertAlmostEqual(two.ProbLess(three), 0.778549382716049)
187

188
    def testPmfMax(self):
189
        d6 = thinkstats2.Pmf(range(1,7))
190
        two = d6 + d6
191
        three = two + d6
192
        cdf = three.Max(6)
193
        thinkplot.Cdf(cdf)
194
        self.assertAlmostEqual(cdf[14], 0.558230962626)
195

196
    def testCdf(self):
197
        t = [1, 2, 2, 3, 5]
198
        pmf = thinkstats2.Pmf(t)
199
        hist = thinkstats2.Hist(t)
200

201
        cdf = thinkstats2.Cdf(pmf)
202
        self.assertEqual(len(str(cdf)), 33)
203

204
        self.assertEqual(cdf[0], 0)
205
        self.assertAlmostEqual(cdf[1], 0.2)
206
        self.assertAlmostEqual(cdf[2], 0.6)
207
        self.assertAlmostEqual(cdf[3], 0.8)
208
        self.assertAlmostEqual(cdf[4], 0.8)
209
        self.assertAlmostEqual(cdf[5], 1)
210
        self.assertAlmostEqual(cdf[6], 1)
211

212
        xs = list(range(-1, 7))
213
        ps = cdf.Probs(xs)
214
        for p1, p2 in zip(ps, [0, 0, 0.2, 0.6, 0.8, 0.8, 1, 1]):
215
            self.assertAlmostEqual(p1, p2)
216

217
        self.assertEqual(cdf.Value(0), 1)
218
        self.assertEqual(cdf.Value(0.1), 1)
219
        self.assertEqual(cdf.Value(0.2), 1)
220
        self.assertEqual(cdf.Value(0.3), 2)
221
        self.assertEqual(cdf.Value(0.4), 2)
222
        self.assertEqual(cdf.Value(0.5), 2)
223
        self.assertEqual(cdf.Value(0.6), 2)
224
        self.assertEqual(cdf.Value(0.7), 3)
225
        self.assertEqual(cdf.Value(0.8), 3)
226
        self.assertEqual(cdf.Value(0.9), 5)
227
        self.assertEqual(cdf.Value(1), 5)
228

229
        ps = np.linspace(0, 1, 11)
230
        xs = cdf.ValueArray(ps)
231
        self.assertTrue((xs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())
232

233
        np.random.seed(17)
234
        xs = cdf.Sample(7)
235
        self.assertListEqual(xs.tolist(), [2, 2, 1, 1, 3, 3, 3])
236

237
        # when you make a Cdf from a Pdf, you might get some floating
238
        # point representation error
239
        self.assertEqual(len(cdf), 4)
240
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
241
        self.assertAlmostEqual(cdf[2], 0.6)
242
        self.assertEqual(cdf.Value(0.6), 2)
243

244
        cdf = thinkstats2.MakeCdfFromPmf(pmf)
245
        self.assertEqual(len(cdf), 4)
246
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
247
        self.assertEqual(cdf.Value(0.6), 2)
248

249
        cdf = thinkstats2.MakeCdfFromItems(pmf.Items())
250
        self.assertEqual(len(cdf), 4)
251
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
252
        self.assertEqual(cdf.Value(0.6), 2)
253

254
        cdf = thinkstats2.Cdf(pmf.d)
255
        self.assertEqual(len(cdf), 4)
256
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
257
        self.assertEqual(cdf.Value(0.6), 2)
258

259
        cdf = thinkstats2.MakeCdfFromDict(pmf.d)
260
        self.assertEqual(len(cdf), 4)
261
        self.assertAlmostEqual(cdf.Prob(2), 0.6)
262
        self.assertEqual(cdf.Value(0.6), 2)
263

264
        cdf = thinkstats2.Cdf(hist)
265
        self.assertEqual(len(cdf), 4)
266
        self.assertEqual(cdf.Prob(2), 0.6)
267
        self.assertEqual(cdf.Value(0.6), 2)
268

269
        cdf = thinkstats2.MakeCdfFromHist(hist)
270
        self.assertEqual(len(cdf), 4)
271
        self.assertEqual(cdf.Prob(2), 0.6)
272
        self.assertEqual(cdf.Value(0.6), 2)
273

274
        cdf = thinkstats2.Cdf(t)
275
        self.assertEqual(len(cdf), 4)
276
        self.assertEqual(cdf.Prob(2), 0.6)
277
        self.assertEqual(cdf.Value(0.6), 2)
278

279
        cdf = thinkstats2.MakeCdfFromList(t)
280
        self.assertEqual(len(cdf), 4)
281
        self.assertEqual(cdf.Prob(2), 0.6)
282
        self.assertEqual(cdf.Value(0.6), 2)
283

284
        cdf = thinkstats2.Cdf(Counter(t))
285
        self.assertEqual(len(cdf), 4)
286
        self.assertEqual(cdf.Prob(2), 0.6)
287
        self.assertEqual(cdf.Value(0.6), 2)
288

289
        cdf2 = cdf.Copy()
290
        self.assertEqual(cdf2.Prob(2), 0.6)
291
        self.assertEqual(cdf2.Value(0.6), 2)
292
        
293
    def testShift(self):
294
        t = [1, 2, 2, 3, 5]
295
        cdf = thinkstats2.Cdf(t)
296
        cdf2 = cdf.Shift(1)
297
        self.assertEqual(cdf[1], cdf2[2])
298

299
    def testScale(self):
300
        t = [1, 2, 2, 3, 5]
301
        cdf = thinkstats2.Cdf(t)
302
        cdf2 = cdf.Scale(2)
303
        self.assertEqual(cdf[2], cdf2[4])
304

305
    def testCdfRender(self):
306
        t = [1, 2, 2, 3, 5]
307
        cdf = thinkstats2.Cdf(t)
308
        xs, ps = cdf.Render()
309
        self.assertEqual(xs[0], 1)
310
        self.assertEqual(ps[2], 0.2)
311
        self.assertEqual(sum(xs), 22)
312
        self.assertEqual(sum(ps), 4.2)
313
        
314
    def testPmfFromCdf(self):
315
        t = [1, 2, 2, 3, 5]
316
        pmf = thinkstats2.Pmf(t)
317
        cdf = thinkstats2.Cdf(pmf)
318
        pmf2 = thinkstats2.Pmf(cdf)
319
        for x in pmf.Values():
320
            self.assertAlmostEqual(pmf[x], pmf2[x])
321

322
        pmf3 = cdf.MakePmf()
323
        for x in pmf.Values():
324
            self.assertAlmostEqual(pmf[x], pmf3[x])
325

326
    def testNormalPdf(self):
327
        pdf = thinkstats2.NormalPdf(mu=1, sigma=2)
328
        self.assertEqual(len(str(pdf)), 29)
329
        self.assertAlmostEqual(pdf.Density(3), 0.12098536226)
330

331
        pmf = pdf.MakePmf()
332
        self.assertAlmostEqual(pmf[1.0], 0.0239951295619)
333
        xs, ps = pdf.Render()
334
        self.assertEqual(xs[0], -5.0)
335
        self.assertAlmostEqual(ps[0], 0.0022159242059690038)
336

337
        pmf = thinkstats2.Pmf(pdf)
338
        self.assertAlmostEqual(pmf[1.0], 0.0239951295619)
339
        xs, ps = pmf.Render()
340
        self.assertEqual(xs[0], -5.0)
341
        self.assertAlmostEqual(ps[0], 0.00026656181123)
342
        
343
        cdf = thinkstats2.Cdf(pdf)
344
        self.assertAlmostEqual(cdf[1.0], 0.51199756478094904)
345
        xs, ps = cdf.Render()
346
        self.assertEqual(xs[0], -5.0)
347
        self.assertAlmostEqual(ps[0], 0.0)
348
        
349
    def testExponentialPdf(self):
350
        pdf = thinkstats2.ExponentialPdf(lam=0.5)
351
        self.assertEqual(len(str(pdf)), 24)
352
        self.assertAlmostEqual(pdf.Density(3), 0.11156508007421491)
353
        pmf = pdf.MakePmf()
354
        self.assertAlmostEqual(pmf[1.0], 0.02977166586593202)
355
        xs, ps = pdf.Render()
356
        self.assertEqual(xs[0], 0.0)
357
        self.assertAlmostEqual(ps[0], 0.5)
358
        
359
    def testEstimatedPdf(self):
360
        pdf = thinkstats2.EstimatedPdf([1, 2, 2, 3, 5])
361
        self.assertEqual(len(str(pdf)), 30)
362
        self.assertAlmostEqual(pdf.Density(3)[0], 0.19629968)
363
        pmf = pdf.MakePmf()
364
        self.assertAlmostEqual(pmf[1.0], 0.010172282816895044)        
365
        pmf = pdf.MakePmf(low=0, high=6)
366
        self.assertAlmostEqual(pmf[0.0], 0.0050742294053582942)
367
        
368
    def testEvalNormalCdf(self):
369
        p = thinkstats2.EvalNormalCdf(0)
370
        self.assertAlmostEqual(p, 0.5)
371

372
        p = thinkstats2.EvalNormalCdf(2, 2, 3)
373
        self.assertAlmostEqual(p, 0.5)
374

375
        p = thinkstats2.EvalNormalCdf(1000, 0, 1)
376
        self.assertAlmostEqual(p, 1.0)
377

378
        p = thinkstats2.EvalNormalCdf(-1000, 0, 1)
379
        self.assertAlmostEqual(p, 0.0)
380

381
        x = thinkstats2.EvalNormalCdfInverse(0.95, 0, 1)
382
        self.assertAlmostEqual(x, 1.64485362695)
383
        x = thinkstats2.EvalNormalCdfInverse(0.05, 0, 1)
384
        self.assertAlmostEqual(x, -1.64485362695)
385

386
    def testEvalPoissonPmf(self):
387
        p = thinkstats2.EvalPoissonPmf(2, 1)
388
        self.assertAlmostEqual(p, 0.1839397205)
389

390
    def testCov(self):
391
        t = [0, 4, 7, 3, 8, 1, 6, 2, 9, 5]
392
        a = np.array(t)
393
        t2 = [5, 4, 3, 0, 8, 9, 7, 6, 2, 1]
394

395
        self.assertAlmostEqual(thinkstats2.Cov(t, a), 8.25)
396
        self.assertAlmostEqual(thinkstats2.Cov(t, -a), -8.25)
397

398
        self.assertAlmostEqual(thinkstats2.Corr(t, a), 1)
399
        self.assertAlmostEqual(thinkstats2.Corr(t, -a), -1)
400
        self.assertAlmostEqual(thinkstats2.Corr(t, t2), -0.1878787878)
401
        
402
        self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, -a), -1)
403
        self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, t2), -0.1878787878)
404
        
405
    def testReadStataDct(self):
406
        dct = thinkstats2.ReadStataDct('2002FemPreg.dct')
407
        self.assertEqual(len(dct.variables), 243)
408
        self.assertEqual(len(dct.colspecs), 243)
409
        self.assertEqual(len(dct.names), 243)
410
        self.assertEqual(dct.colspecs[-1][1], -1)
411

412
    def testCdfProbs(self):
413
        t = [-1, 1, 2, 2, 3, 5]
414
        cdf = thinkstats2.Cdf(t)
415
        ps = cdf.Probs(t)
416
        print(ps)
417

418
    def testPmfOfHist(self):
419
        bowl1 = thinkstats2.Hist(dict(vanilla=30, chocolate=10))
420
        bowl2 = thinkstats2.Hist(dict(vanilla=20, chocolate=20))
421
        pmf = thinkstats2.Pmf([bowl1, bowl2])
422
        pmf.Print()
423

424
if __name__ == "__main__":
425
    unittest.main()
426

427