| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7116License: GPL3
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function89import math10import numpy as np1112import nsfg13import thinkstats214import thinkplot151617def MakeFrames():18"""Reads pregnancy data and partitions first babies and others.1920returns: DataFrames (all live births, first babies, others)21"""22preg = nsfg.ReadFemPreg()2324live = preg[preg.outcome == 1]25firsts = live[live.birthord == 1]26others = live[live.birthord != 1]2728assert len(live) == 914829assert len(firsts) == 441330assert len(others) == 47353132return live, firsts, others333435def Summarize(live, firsts, others):36"""Print various summary statistics."""3738mean = live.prglngth.mean()39var = live.prglngth.var()40std = live.prglngth.std()4142print('Live mean', mean)43print('Live variance', var)44print('Live std', std)4546mean1 = firsts.prglngth.mean()47mean2 = others.prglngth.mean()4849var1 = firsts.prglngth.var()50var2 = others.prglngth.var()5152print('Mean')53print('First babies', mean1)54print('Others', mean2)5556print('Variance')57print('First babies', var1)58print('Others', var2)5960print('Difference in weeks', mean1 - mean2)61print('Difference in hours', (mean1 - mean2) * 7 * 24)6263print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100)6465d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth)66print('Cohen d', d)676869def PrintExtremes(live):70"""Plots the histogram of pregnancy lengths and prints the extremes.7172live: DataFrame of live births73"""74hist = thinkstats2.Hist(live.prglngth)75thinkplot.Hist(hist, label='live births')7677thinkplot.Save(root='first_nsfg_hist_live',78title='Histogram',79xlabel='weeks',80ylabel='frequency')8182print('Shortest lengths:')83for weeks, freq in hist.Smallest(10):84print(weeks, freq)8586print('Longest lengths:')87for weeks, freq in hist.Largest(10):88print(weeks, freq)899091def MakeHists(live):92"""Plot Hists for live births9394live: DataFrame95others: DataFrame96"""97hist = thinkstats2.Hist(live.birthwgt_lb, label='birthwgt_lb')98thinkplot.Hist(hist)99thinkplot.Save(root='first_wgt_lb_hist',100xlabel='pounds',101ylabel='frequency',102axis=[-1, 14, 0, 3200])103104hist = thinkstats2.Hist(live.birthwgt_oz, label='birthwgt_oz')105thinkplot.Hist(hist)106thinkplot.Save(root='first_wgt_oz_hist',107xlabel='ounces',108ylabel='frequency',109axis=[-1, 16, 0, 1200])110111hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')112thinkplot.Hist(hist)113thinkplot.Save(root='first_agepreg_hist',114xlabel='years',115ylabel='frequency')116117hist = thinkstats2.Hist(live.prglngth, label='prglngth')118thinkplot.Hist(hist)119thinkplot.Save(root='first_prglngth_hist',120xlabel='weeks',121ylabel='frequency',122axis=[-1, 53, 0, 5000])123124125def MakeComparison(firsts, others):126"""Plots histograms of pregnancy length for first babies and others.127128firsts: DataFrame129others: DataFrame130"""131first_hist = thinkstats2.Hist(firsts.prglngth, label='first')132other_hist = thinkstats2.Hist(others.prglngth, label='other')133134width = 0.45135thinkplot.PrePlot(2)136thinkplot.Hist(first_hist, align='right', width=width)137thinkplot.Hist(other_hist, align='left', width=width)138139thinkplot.Save(root='first_nsfg_hist',140title='Histogram',141xlabel='weeks',142ylabel='frequency',143axis=[27, 46, 0, 2700])144145146def main(script):147live, firsts, others = MakeFrames()148149MakeHists(live)150PrintExtremes(live)151MakeComparison(firsts, others)152Summarize(live, firsts, others)153154155if __name__ == '__main__':156import sys157main(*sys.argv)158159160161162