| Download
Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Project: Support and Testing
Views: 7120License: GPL3
"""This file contains code used in "Think Stats",1by Allen B. Downey, available from greenteapress.com23Copyright 2014 Allen B. Downey4License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html5"""67from __future__ import print_function, division89import thinkstats210import thinkplot1112import math13import random14import numpy as np151617def MeanError(estimates, actual):18"""Computes the mean error of a sequence of estimates.1920estimate: sequence of numbers21actual: actual value2223returns: float mean error24"""25errors = [estimate-actual for estimate in estimates]26return np.mean(errors)272829def RMSE(estimates, actual):30"""Computes the root mean squared error of a sequence of estimates.3132estimate: sequence of numbers33actual: actual value3435returns: float RMSE36"""37e2 = [(estimate-actual)**2 for estimate in estimates]38mse = np.mean(e2)39return math.sqrt(mse)404142def Estimate1(n=7, m=1000):43"""Evaluates RMSE of sample mean and median as estimators.4445n: sample size46m: number of iterations47"""48mu = 049sigma = 15051means = []52medians = []53for _ in range(m):54xs = [random.gauss(mu, sigma) for _ in range(n)]55xbar = np.mean(xs)56median = np.median(xs)57means.append(xbar)58medians.append(median)5960print('Experiment 1')61print('rmse xbar', RMSE(means, mu))62print('rmse median', RMSE(medians, mu))636465def Estimate2(n=7, m=1000):66"""Evaluates S and Sn-1 as estimators of sample variance.6768n: sample size69m: number of iterations70"""71mu = 072sigma = 17374estimates1 = []75estimates2 = []76for _ in range(m):77xs = [random.gauss(mu, sigma) for _ in range(n)]78biased = np.var(xs)79unbiased = np.var(xs, ddof=1)80estimates1.append(biased)81estimates2.append(unbiased)8283print('Experiment 2')84print('mean error biased', MeanError(estimates1, sigma**2))85print('mean error unbiased', MeanError(estimates2, sigma**2))868788def Estimate3(n=7, m=1000):89"""Evaluates L and Lm as estimators of the exponential parameter.9091n: sample size92m: number of iterations93"""94lam = 29596means = []97medians = []98for _ in range(m):99xs = np.random.exponential(1/lam, n)100L = 1 / np.mean(xs)101Lm = math.log(2) / np.median(xs)102means.append(L)103medians.append(Lm)104105print('Experiment 3')106print('rmse L', RMSE(means, lam))107print('rmse Lm', RMSE(medians, lam))108print('mean error L', MeanError(means, lam))109print('mean error Lm', MeanError(medians, lam))110111112def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):113"""Plots the sampling distribution of the sample mean.114115mu: hypothetical population mean116sigma: hypothetical population standard deviation117n: sample size118m: number of iterations119"""120def VertLine(x, y=1):121thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)122123means = []124for _ in range(m):125xs = np.random.normal(mu, sigma, n)126xbar = np.mean(xs)127means.append(xbar)128129stderr = RMSE(means, mu)130print('standard error', stderr)131132cdf = thinkstats2.Cdf(means)133ci = cdf.Percentile(5), cdf.Percentile(95)134print('confidence interval', ci)135VertLine(ci[0])136VertLine(ci[1])137138# plot the CDF139thinkplot.Cdf(cdf)140thinkplot.Save(root='estimation1',141xlabel='sample mean',142ylabel='CDF',143title='Sampling distribution')144145146def main():147thinkstats2.RandomSeed(17)148149Estimate1()150Estimate2()151Estimate3(m=1000)152SimulateSample()153154155156if __name__ == '__main__':157main()158159160