| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7115License: GPL3
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function89import math10import random1112import brfss13import first14import thinkstats215import thinkplot161718def Summarize(data):19"""Prints summary statistics.2021data: pandas Series22"""23mean = data.mean()24std = data.std()25median = thinkstats2.Median(data)26print('mean', mean)27print('std', std)28print('median', median)29print('skewness', thinkstats2.Skewness(data))30print('pearson skewness',31thinkstats2.PearsonMedianSkewness(data))3233return mean, median343536def ComputeSkewnesses():37"""Plots KDE of birthweight and adult weight.38"""39def VertLine(x, y):40thinkplot.Plot([x, x], [0, y], color='0.6', linewidth=1)4142live, firsts, others = first.MakeFrames()43data = live.totalwgt_lb.dropna()44print('Birth weight')45mean, median = Summarize(data)4647y = 0.3548VertLine(mean, y)49thinkplot.Text(mean-0.15, 0.1*y, 'mean', horizontalalignment='right')50VertLine(median, y)51thinkplot.Text(median+0.1, 0.1*y, 'median', horizontalalignment='left')5253pdf = thinkstats2.EstimatedPdf(data)54thinkplot.Pdf(pdf, label='birth weight')55thinkplot.Save(root='density_totalwgt_kde',56xlabel='lbs',57ylabel='PDF')5859df = brfss.ReadBrfss(nrows=None)60data = df.wtkg2.dropna()61print('Adult weight')62mean, median = Summarize(data)6364y = 0.0249965VertLine(mean, y)66thinkplot.Text(mean+1, 0.1*y, 'mean', horizontalalignment='left')67VertLine(median, y)68thinkplot.Text(median-1.5, 0.1*y, 'median', horizontalalignment='right')6970pdf = thinkstats2.EstimatedPdf(data)71thinkplot.Pdf(pdf, label='adult weight')72thinkplot.Save(root='density_wtkg2_kde',73xlabel='kg',74ylabel='PDF',75xlim=[0, 200])767778def MakePdfExample(n=500):79"""Plots a normal density function and a KDE estimate.8081n: sample size82"""83# mean and var of women's heights in cm, from the BRFSS84mean, var = 163, 52.885std = math.sqrt(var)8687# make a PDF and compute a density, FWIW88pdf = thinkstats2.NormalPdf(mean, std)89print(pdf.Density(mean + std))9091# make a PMF and plot it92thinkplot.PrePlot(2)93thinkplot.Pdf(pdf, label='normal')9495# make a sample, make an estimated PDF, and plot it96sample = [random.gauss(mean, std) for _ in range(n)]97sample_pdf = thinkstats2.EstimatedPdf(sample)98thinkplot.Pdf(sample_pdf, label='sample KDE')99100thinkplot.Save(root='pdf_example',101xlabel='Height (cm)',102ylabel='Density')103104105def main():106thinkstats2.RandomSeed(17)107108MakePdfExample()109ComputeSkewnesses()110111112if __name__ == '__main__':113main()114115116