| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7115License: GPL3
"""This file contains code for use with "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import numpy as np10import random1112import first13import normal14import thinkstats215import thinkplot161718def PlotPregLengths(live, firsts, others):19"""Plots sampling distributions under the null and alternate hypotheses.2021live, firsts, others: DataFrames2223Results:24null hypothesis N(0, 0.00319708)250.0837707042554 0.0837707042554 (90% CI)2627estimated params N(0.0780373, 0.00321144)28-0.0151758158699 0.171250349425 (90% CI)2930Sampling distribution under the null hypothesis is centered31around 0.3233Sampling distribution under the null hypothesis is centered34around the observed difference, 0.078.3536The variance of the two distributions is very similar; in practice,37you could reasonably compute whichever one is easier.3839"""40print('prglngth example')41delta = firsts.prglngth.mean() - others.prglngth.mean()42print(delta)4344dist1 = normal.SamplingDistMean(live.prglngth, len(firsts))45dist2 = normal.SamplingDistMean(live.prglngth, len(others))46dist = dist1 - dist247print('null hypothesis', dist)48print(dist.Prob(-delta), 1 - dist.Prob(delta))4950thinkplot.PrePlot(2)51thinkplot.Plot(dist, label='null hypothesis')5253dist1 = normal.SamplingDistMean(firsts.prglngth, len(firsts))54dist2 = normal.SamplingDistMean(others.prglngth, len(others))55dist = dist1 - dist256print('estimated params', dist)57print(dist.Percentile(5), dist.Percentile(95))5859thinkplot.Plot(dist, label='estimated params')60thinkplot.Show(xlabel='difference in means (weeks)',61ylabel='CDF')626364def GenerateAdultWeight(birth_weights, n):65"""Generate a random adult weight by simulating annual gain.6667birth_weights: sequence of birth weights in lbs68n: number of years to simulate6970returns: adult weight in lbs71"""72bw = random.choice(birth_weights)73factors = np.random.normal(1.09, 0.03, n)74aw = bw * np.prod(factors)75return aw767778def PlotAdultWeights(live):79"""Makes a normal probability plot of log10 adult weight.8081live: DataFrame of live births8283results:8485With n=40 the distribution is approximately lognormal except for86the lowest weights.8788Actual distribution might deviate from lognormal because it is89a mixture of people at different ages, or because annual weight90gains are correlated.91"""92birth_weights = live.totalwgt_lb.dropna().values93aws = [GenerateAdultWeight(birth_weights, 40) for _ in range(1000)]94log_aws = np.log10(aws)95thinkstats2.NormalProbabilityPlot(log_aws)96thinkplot.Show(xlabel='standard normal values',97ylabel='adult weight (log10 lbs)')9899100def TestIntervention():101"""Tests whether reported changes are statistically significant.102103Results:104-1.66 4.73095323208e-05105-0.26 0.1252679872071061.4 0.00182694836898107108Conclusions:1091101) Gender gap before intervention was 1.66 points (p-value 5e-5)1111122) Genger gap after was 0.26 points (p-value 0.13, no significant)1131143) Change in gender gap was 1.4 points (p-value 0.002, significant).115"""116male_before = normal.Normal(3.57, 0.28**2)117male_after = normal.Normal(3.44, 0.16**2)118119female_before = normal.Normal(1.91, 0.32**2)120female_after = normal.Normal(3.18, 0.16**2)121122diff_before = female_before - male_before123print('mean, p-value', diff_before.mu, 1-diff_before.Prob(0))124print('CI', diff_before.Percentile(5), diff_before.Percentile(95))125print('stderr', diff_before.sigma)126127diff_after = female_after - male_after128print('mean, p-value', diff_after.mu, 1-diff_after.Prob(0))129print('CI', diff_after.Percentile(5), diff_after.Percentile(95))130print('stderr', diff_after.sigma)131132diff = diff_after - diff_before133print('mean, p-value', diff.mu, diff.Prob(0))134print('CI', diff.Percentile(5), diff.Percentile(95))135print('stderr', diff.sigma)136137138def main():139thinkstats2.RandomSeed(17)140141TestIntervention()142return143144live, firsts, others = first.MakeFrames()145PlotAdultWeights(live)146147PlotPregLengths(live, firsts, others)148149150if __name__ == '__main__':151main()152153154