| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7115License: GPL3
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import unittest10import random1112from collections import Counter13import numpy as np1415import thinkstats216import thinkplot1718class Test(unittest.TestCase):1920def testOdds(self):21p = 0.7522o = thinkstats2.Odds(p)23self.assertEqual(o, 3)2425p = thinkstats2.Probability(o)26self.assertEqual(p, 0.75)2728p = thinkstats2.Probability2(3, 1)29self.assertEqual(p, 0.75)3031def testMean(self):32t = [1, 1, 1, 3, 3, 591]33mean = thinkstats2.Mean(t)34self.assertEqual(mean, 100)3536def testVar(self):37t = [1, 1, 1, 3, 3, 591]38mean = thinkstats2.Mean(t)39var1 = thinkstats2.Var(t)40var2 = thinkstats2.Var(t, mean)4142self.assertAlmostEqual(mean, 100.0)43self.assertAlmostEqual(var1, 48217.0)44self.assertAlmostEqual(var2, 48217.0)4546def testMeanVar(self):47t = [1, 1, 1, 3, 3, 591]48mean, var = thinkstats2.MeanVar(t)4950self.assertAlmostEqual(mean, 100.0)51self.assertAlmostEqual(var, 48217.0)5253def testBinomialCoef(self):54res = thinkstats2.BinomialCoef(10, 3)55self.assertEqual(round(res), 120)5657res = thinkstats2.BinomialCoef(100, 4)58self.assertEqual(round(res), 3921225)5960def testInterpolator(self):61xs = [1, 2, 3]62ys = [4, 5, 6]63interp = thinkstats2.Interpolator(xs, ys)6465y = interp.Lookup(1)66self.assertAlmostEqual(y, 4)6768y = interp.Lookup(2)69self.assertAlmostEqual(y, 5)7071y = interp.Lookup(3)72self.assertAlmostEqual(y, 6)7374y = interp.Lookup(1.5)75self.assertAlmostEqual(y, 4.5)7677y = interp.Lookup(2.75)78self.assertAlmostEqual(y, 5.75)7980x = interp.Reverse(4)81self.assertAlmostEqual(x, 1)8283x = interp.Reverse(6)84self.assertAlmostEqual(x, 3)8586x = interp.Reverse(4.5)87self.assertAlmostEqual(x, 1.5)8889x = interp.Reverse(5.75)90self.assertAlmostEqual(x, 2.75)9192def testTrim(self):93t = list(range(100))94random.shuffle(t)95trimmed = thinkstats2.Trim(t, p=0.05)96n = len(trimmed)97self.assertEqual(n, 90)9899def testHist(self):100hist = thinkstats2.Hist('allen')101self.assertEqual(len(str(hist)), 38)102103self.assertEqual(len(hist), 4)104self.assertEqual(hist.Freq('l'), 2)105106hist = thinkstats2.Hist(Counter('allen'))107self.assertEqual(len(hist), 4)108self.assertEqual(hist.Freq('l'), 2)109110hist2 = thinkstats2.Hist('nella')111self.assertEqual(hist, hist2)112113def testPmf(self):114pmf = thinkstats2.Pmf('allen')115# this one might not be a robust test116self.assertEqual(len(str(pmf)), 45)117118self.assertEqual(len(pmf), 4)119self.assertEqual(pmf.Prob('l'), 0.4)120self.assertEqual(pmf['l'], 0.4)121self.assertEqual(pmf.Percentile(50), 'l')122123pmf = thinkstats2.Pmf(Counter('allen'))124self.assertEqual(len(pmf), 4)125self.assertEqual(pmf.Prob('l'), 0.4)126127pmf = thinkstats2.Pmf(pmf)128self.assertEqual(len(pmf), 4)129self.assertEqual(pmf.Prob('l'), 0.4)130131pmf2 = pmf.Copy()132self.assertEqual(pmf, pmf2)133134xs, ys = pmf.Render()135self.assertEqual(tuple(xs), tuple(sorted(pmf.Values())))136137def testSortedItems(self):138pmf = thinkstats2.Pmf('allen')139items = pmf.SortedItems()140self.assertEqual(len(items), 4)141142pmf = thinkstats2.Pmf(['a', float('nan'), 1, pmf])143# should generate a warning144items = pmf.SortedItems()145self.assertEqual(len(items), 4)146147def testPmfAddSub(self):148pmf = thinkstats2.Pmf([1, 2, 3, 4, 5, 6])149150pmf1 = pmf + 1151self.assertAlmostEqual(pmf1.Mean(), 4.5)152153pmf2 = pmf + pmf154self.assertAlmostEqual(pmf2.Mean(), 7.0)155156pmf3 = pmf - 1157self.assertAlmostEqual(pmf3.Mean(), 2.5)158159pmf4 = pmf - pmf160self.assertAlmostEqual(pmf4.Mean(), 0)161162def testPmfMulDiv(self):163pmf = thinkstats2.Pmf([1, 2, 3, 4, 5, 6])164165pmf1 = pmf * 2166self.assertAlmostEqual(pmf1.Mean(), 7)167168pmf2 = pmf * pmf169self.assertAlmostEqual(pmf2.Mean(), 12.25)170171pmf3 = pmf / 2172self.assertAlmostEqual(pmf3.Mean(), 1.75)173174pmf4 = pmf / pmf175self.assertAlmostEqual(pmf4.Mean(), 1.4291667)176177def testPmfProbLess(self):178d6 = thinkstats2.Pmf(range(1,7))179self.assertEqual(d6.ProbLess(4), 0.5)180self.assertEqual(d6.ProbGreater(3), 0.5)181two = d6 + d6182three = two + d6183# Pmf no longer supports magic comparators184self.assertAlmostEqual(two.ProbGreater(three), 0.15200617284)185self.assertAlmostEqual(two.ProbLess(three), 0.778549382716049)186187def testPmfMax(self):188d6 = thinkstats2.Pmf(range(1,7))189two = d6 + d6190three = two + d6191cdf = three.Max(6)192thinkplot.Cdf(cdf)193self.assertAlmostEqual(cdf[14], 0.558230962626)194195def testCdf(self):196t = [1, 2, 2, 3, 5]197pmf = thinkstats2.Pmf(t)198hist = thinkstats2.Hist(t)199200cdf = thinkstats2.Cdf(pmf)201self.assertEqual(len(str(cdf)), 33)202203self.assertEqual(cdf[0], 0)204self.assertAlmostEqual(cdf[1], 0.2)205self.assertAlmostEqual(cdf[2], 0.6)206self.assertAlmostEqual(cdf[3], 0.8)207self.assertAlmostEqual(cdf[4], 0.8)208self.assertAlmostEqual(cdf[5], 1)209self.assertAlmostEqual(cdf[6], 1)210211xs = list(range(-1, 7))212ps = cdf.Probs(xs)213for p1, p2 in zip(ps, [0, 0, 0.2, 0.6, 0.8, 0.8, 1, 1]):214self.assertAlmostEqual(p1, p2)215216self.assertEqual(cdf.Value(0), 1)217self.assertEqual(cdf.Value(0.1), 1)218self.assertEqual(cdf.Value(0.2), 1)219self.assertEqual(cdf.Value(0.3), 2)220self.assertEqual(cdf.Value(0.4), 2)221self.assertEqual(cdf.Value(0.5), 2)222self.assertEqual(cdf.Value(0.6), 2)223self.assertEqual(cdf.Value(0.7), 3)224self.assertEqual(cdf.Value(0.8), 3)225self.assertEqual(cdf.Value(0.9), 5)226self.assertEqual(cdf.Value(1), 5)227228ps = np.linspace(0, 1, 11)229xs = cdf.ValueArray(ps)230self.assertTrue((xs == [1, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5]).all())231232np.random.seed(17)233xs = cdf.Sample(7)234self.assertListEqual(xs.tolist(), [2, 2, 1, 1, 3, 3, 3])235236# when you make a Cdf from a Pdf, you might get some floating237# point representation error238self.assertEqual(len(cdf), 4)239self.assertAlmostEqual(cdf.Prob(2), 0.6)240self.assertAlmostEqual(cdf[2], 0.6)241self.assertEqual(cdf.Value(0.6), 2)242243cdf = thinkstats2.MakeCdfFromPmf(pmf)244self.assertEqual(len(cdf), 4)245self.assertAlmostEqual(cdf.Prob(2), 0.6)246self.assertEqual(cdf.Value(0.6), 2)247248cdf = thinkstats2.MakeCdfFromItems(pmf.Items())249self.assertEqual(len(cdf), 4)250self.assertAlmostEqual(cdf.Prob(2), 0.6)251self.assertEqual(cdf.Value(0.6), 2)252253cdf = thinkstats2.Cdf(pmf.d)254self.assertEqual(len(cdf), 4)255self.assertAlmostEqual(cdf.Prob(2), 0.6)256self.assertEqual(cdf.Value(0.6), 2)257258cdf = thinkstats2.MakeCdfFromDict(pmf.d)259self.assertEqual(len(cdf), 4)260self.assertAlmostEqual(cdf.Prob(2), 0.6)261self.assertEqual(cdf.Value(0.6), 2)262263cdf = thinkstats2.Cdf(hist)264self.assertEqual(len(cdf), 4)265self.assertEqual(cdf.Prob(2), 0.6)266self.assertEqual(cdf.Value(0.6), 2)267268cdf = thinkstats2.MakeCdfFromHist(hist)269self.assertEqual(len(cdf), 4)270self.assertEqual(cdf.Prob(2), 0.6)271self.assertEqual(cdf.Value(0.6), 2)272273cdf = thinkstats2.Cdf(t)274self.assertEqual(len(cdf), 4)275self.assertEqual(cdf.Prob(2), 0.6)276self.assertEqual(cdf.Value(0.6), 2)277278cdf = thinkstats2.MakeCdfFromList(t)279self.assertEqual(len(cdf), 4)280self.assertEqual(cdf.Prob(2), 0.6)281self.assertEqual(cdf.Value(0.6), 2)282283cdf = thinkstats2.Cdf(Counter(t))284self.assertEqual(len(cdf), 4)285self.assertEqual(cdf.Prob(2), 0.6)286self.assertEqual(cdf.Value(0.6), 2)287288cdf2 = cdf.Copy()289self.assertEqual(cdf2.Prob(2), 0.6)290self.assertEqual(cdf2.Value(0.6), 2)291292def testShift(self):293t = [1, 2, 2, 3, 5]294cdf = thinkstats2.Cdf(t)295cdf2 = cdf.Shift(1)296self.assertEqual(cdf[1], cdf2[2])297298def testScale(self):299t = [1, 2, 2, 3, 5]300cdf = thinkstats2.Cdf(t)301cdf2 = cdf.Scale(2)302self.assertEqual(cdf[2], cdf2[4])303304def testCdfRender(self):305t = [1, 2, 2, 3, 5]306cdf = thinkstats2.Cdf(t)307xs, ps = cdf.Render()308self.assertEqual(xs[0], 1)309self.assertEqual(ps[2], 0.2)310self.assertEqual(sum(xs), 22)311self.assertEqual(sum(ps), 4.2)312313def testPmfFromCdf(self):314t = [1, 2, 2, 3, 5]315pmf = thinkstats2.Pmf(t)316cdf = thinkstats2.Cdf(pmf)317pmf2 = thinkstats2.Pmf(cdf)318for x in pmf.Values():319self.assertAlmostEqual(pmf[x], pmf2[x])320321pmf3 = cdf.MakePmf()322for x in pmf.Values():323self.assertAlmostEqual(pmf[x], pmf3[x])324325def testNormalPdf(self):326pdf = thinkstats2.NormalPdf(mu=1, sigma=2)327self.assertEqual(len(str(pdf)), 29)328self.assertAlmostEqual(pdf.Density(3), 0.12098536226)329330pmf = pdf.MakePmf()331self.assertAlmostEqual(pmf[1.0], 0.0239951295619)332xs, ps = pdf.Render()333self.assertEqual(xs[0], -5.0)334self.assertAlmostEqual(ps[0], 0.0022159242059690038)335336pmf = thinkstats2.Pmf(pdf)337self.assertAlmostEqual(pmf[1.0], 0.0239951295619)338xs, ps = pmf.Render()339self.assertEqual(xs[0], -5.0)340self.assertAlmostEqual(ps[0], 0.00026656181123)341342cdf = thinkstats2.Cdf(pdf)343self.assertAlmostEqual(cdf[1.0], 0.51199756478094904)344xs, ps = cdf.Render()345self.assertEqual(xs[0], -5.0)346self.assertAlmostEqual(ps[0], 0.0)347348def testExponentialPdf(self):349pdf = thinkstats2.ExponentialPdf(lam=0.5)350self.assertEqual(len(str(pdf)), 24)351self.assertAlmostEqual(pdf.Density(3), 0.11156508007421491)352pmf = pdf.MakePmf()353self.assertAlmostEqual(pmf[1.0], 0.02977166586593202)354xs, ps = pdf.Render()355self.assertEqual(xs[0], 0.0)356self.assertAlmostEqual(ps[0], 0.5)357358def testEstimatedPdf(self):359pdf = thinkstats2.EstimatedPdf([1, 2, 2, 3, 5])360self.assertEqual(len(str(pdf)), 30)361self.assertAlmostEqual(pdf.Density(3)[0], 0.19629968)362pmf = pdf.MakePmf()363self.assertAlmostEqual(pmf[1.0], 0.010172282816895044)364pmf = pdf.MakePmf(low=0, high=6)365self.assertAlmostEqual(pmf[0.0], 0.0050742294053582942)366367def testEvalNormalCdf(self):368p = thinkstats2.EvalNormalCdf(0)369self.assertAlmostEqual(p, 0.5)370371p = thinkstats2.EvalNormalCdf(2, 2, 3)372self.assertAlmostEqual(p, 0.5)373374p = thinkstats2.EvalNormalCdf(1000, 0, 1)375self.assertAlmostEqual(p, 1.0)376377p = thinkstats2.EvalNormalCdf(-1000, 0, 1)378self.assertAlmostEqual(p, 0.0)379380x = thinkstats2.EvalNormalCdfInverse(0.95, 0, 1)381self.assertAlmostEqual(x, 1.64485362695)382x = thinkstats2.EvalNormalCdfInverse(0.05, 0, 1)383self.assertAlmostEqual(x, -1.64485362695)384385def testEvalPoissonPmf(self):386p = thinkstats2.EvalPoissonPmf(2, 1)387self.assertAlmostEqual(p, 0.1839397205)388389def testCov(self):390t = [0, 4, 7, 3, 8, 1, 6, 2, 9, 5]391a = np.array(t)392t2 = [5, 4, 3, 0, 8, 9, 7, 6, 2, 1]393394self.assertAlmostEqual(thinkstats2.Cov(t, a), 8.25)395self.assertAlmostEqual(thinkstats2.Cov(t, -a), -8.25)396397self.assertAlmostEqual(thinkstats2.Corr(t, a), 1)398self.assertAlmostEqual(thinkstats2.Corr(t, -a), -1)399self.assertAlmostEqual(thinkstats2.Corr(t, t2), -0.1878787878)400401self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, -a), -1)402self.assertAlmostEqual(thinkstats2.SpearmanCorr(t, t2), -0.1878787878)403404def testReadStataDct(self):405dct = thinkstats2.ReadStataDct('2002FemPreg.dct')406self.assertEqual(len(dct.variables), 243)407self.assertEqual(len(dct.colspecs), 243)408self.assertEqual(len(dct.names), 243)409self.assertEqual(dct.colspecs[-1][1], -1)410411def testCdfProbs(self):412t = [-1, 1, 2, 2, 3, 5]413cdf = thinkstats2.Cdf(t)414ps = cdf.Probs(t)415print(ps)416417def testPmfOfHist(self):418bowl1 = thinkstats2.Hist(dict(vanilla=30, chocolate=10))419bowl2 = thinkstats2.Hist(dict(vanilla=20, chocolate=20))420pmf = thinkstats2.Pmf([bowl1, bowl2])421pmf.Print()422423if __name__ == "__main__":424unittest.main()425426427