| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7129License: GPL3
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function89import math10import numpy as np1112import nsfg13import first14import thinkstats215import thinkplot161718def MakeHists(live):19"""Plot Hists for live births2021live: DataFrame22others: DataFrame23"""24hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')25thinkplot.PrePlot(2, cols=2)2627thinkplot.SubPlot(1)28thinkplot.Hist(hist)29thinkplot.Config(xlabel='years',30ylabel='frequency',31axis=[0, 45, 0, 700])3233thinkplot.SubPlot(2)34thinkplot.Pmf(hist)3536thinkplot.Save(root='probability_agepreg_hist',37xlabel='years',38axis=[0, 45, 0, 700])394041def MakeFigures(firsts, others):42"""Plot Pmfs of pregnancy length.4344firsts: DataFrame45others: DataFrame46"""47# plot the PMFs48first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first')49other_pmf = thinkstats2.Pmf(others.prglngth, label='other')50width = 0.455152thinkplot.PrePlot(2, cols=2)53thinkplot.Hist(first_pmf, align='right', width=width)54thinkplot.Hist(other_pmf, align='left', width=width)55thinkplot.Config(xlabel='weeks',56ylabel='probability',57axis=[27, 46, 0, 0.6])5859thinkplot.PrePlot(2)60thinkplot.SubPlot(2)61thinkplot.Pmfs([first_pmf, other_pmf])62thinkplot.Save(root='probability_nsfg_pmf',63xlabel='weeks',64axis=[27, 46, 0, 0.6])6566# plot the differences in the PMFs67weeks = range(35, 46)68diffs = []69for week in weeks:70p1 = first_pmf.Prob(week)71p2 = other_pmf.Prob(week)72diff = 100 * (p1 - p2)73diffs.append(diff)7475thinkplot.Bar(weeks, diffs)76thinkplot.Save(root='probability_nsfg_diffs',77title='Difference in PMFs',78xlabel='weeks',79ylabel='percentage points',80legend=False)818283def BiasPmf(pmf, label=''):84"""Returns the Pmf with oversampling proportional to value.8586If pmf is the distribution of true values, the result is the87distribution that would be seen if values are oversampled in88proportion to their values; for example, if you ask students89how big their classes are, large classes are oversampled in90proportion to their size.9192Args:93pmf: Pmf object.94label: string label for the new Pmf.9596Returns:97Pmf object98"""99new_pmf = pmf.Copy(label=label)100101for x, p in pmf.Items():102new_pmf.Mult(x, x)103104new_pmf.Normalize()105return new_pmf106107108def UnbiasPmf(pmf, label=''):109"""Returns the Pmf with oversampling proportional to 1/value.110111Args:112pmf: Pmf object.113label: string label for the new Pmf.114115Returns:116Pmf object117"""118new_pmf = pmf.Copy(label=label)119120for x, p in pmf.Items():121new_pmf.Mult(x, 1.0/x)122123new_pmf.Normalize()124return new_pmf125126127def ClassSizes():128"""Generate PMFs of observed and actual class size.129"""130# start with the actual distribution of class sizes from the book131d = { 7: 8, 12: 8, 17: 14, 22: 4,13227: 6, 32: 12, 37: 8, 42: 3, 47: 2 }133134# form the pmf135pmf = thinkstats2.Pmf(d, label='actual')136print('mean', pmf.Mean())137print('var', pmf.Var())138139# compute the biased pmf140biased_pmf = BiasPmf(pmf, label='observed')141print('mean', biased_pmf.Mean())142print('var', biased_pmf.Var())143144# unbias the biased pmf145unbiased_pmf = UnbiasPmf(biased_pmf, label='unbiased')146print('mean', unbiased_pmf.Mean())147print('var', unbiased_pmf.Var())148149# plot the Pmfs150thinkplot.PrePlot(2)151thinkplot.Pmfs([pmf, biased_pmf])152thinkplot.Save(root='class_size1',153xlabel='class size',154ylabel='PMF',155axis=[0, 52, 0, 0.27])156157158def main(script):159live, firsts, others = first.MakeFrames()160MakeFigures(firsts, others)161MakeHists(live)162163ClassSizes()164165166if __name__ == '__main__':167import sys168main(*sys.argv)169170171172173