CoCalc -- estimation.py

Think Stats by Allen B. Downey Think Stats is an introduction to Probability and Statistics for Python programmers.
This is the accompanying code for this book.
Website: http://greenteapress.com/wp/think-stats-2e/
Path: think-stats-code / estimation.py
Views: ⁷¹²⁰
License: GPL3
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3

4
Copyright 2014 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7

8
from __future__ import print_function, division
9

10
import thinkstats2
11
import thinkplot
12

13
import math
14
import random
15
import numpy as np
16

17

18
def MeanError(estimates, actual):
19
    """Computes the mean error of a sequence of estimates.
20

21
    estimate: sequence of numbers
22
    actual: actual value
23

24
    returns: float mean error
25
    """
26
    errors = [estimate-actual for estimate in estimates]
27
    return np.mean(errors)
28

29

30
def RMSE(estimates, actual):
31
    """Computes the root mean squared error of a sequence of estimates.
32

33
    estimate: sequence of numbers
34
    actual: actual value
35

36
    returns: float RMSE
37
    """
38
    e2 = [(estimate-actual)**2 for estimate in estimates]
39
    mse = np.mean(e2)
40
    return math.sqrt(mse)
41

42

43
def Estimate1(n=7, m=1000):
44
    """Evaluates RMSE of sample mean and median as estimators.
45

46
    n: sample size
47
    m: number of iterations
48
    """
49
    mu = 0
50
    sigma = 1
51

52
    means = []
53
    medians = []
54
    for _ in range(m):
55
        xs = [random.gauss(mu, sigma) for _ in range(n)]
56
        xbar = np.mean(xs)
57
        median = np.median(xs)
58
        means.append(xbar)
59
        medians.append(median)
60

61
    print('Experiment 1')
62
    print('rmse xbar', RMSE(means, mu))
63
    print('rmse median', RMSE(medians, mu))
64

65

66
def Estimate2(n=7, m=1000):
67
    """Evaluates S and Sn-1 as estimators of sample variance.
68

69
    n: sample size
70
    m: number of iterations
71
    """
72
    mu = 0
73
    sigma = 1
74

75
    estimates1 = []
76
    estimates2 = []
77
    for _ in range(m):
78
        xs = [random.gauss(mu, sigma) for _ in range(n)]
79
        biased = np.var(xs)
80
        unbiased = np.var(xs, ddof=1)
81
        estimates1.append(biased)
82
        estimates2.append(unbiased)
83

84
    print('Experiment 2')
85
    print('mean error biased', MeanError(estimates1, sigma**2))
86
    print('mean error unbiased', MeanError(estimates2, sigma**2))
87

88

89
def Estimate3(n=7, m=1000):
90
    """Evaluates L and Lm as estimators of the exponential parameter.
91

92
    n: sample size
93
    m: number of iterations
94
    """
95
    lam = 2
96

97
    means = []
98
    medians = []
99
    for _ in range(m):
100
        xs = np.random.exponential(1/lam, n)
101
        L = 1 / np.mean(xs)
102
        Lm = math.log(2) / np.median(xs)
103
        means.append(L)
104
        medians.append(Lm)
105

106
    print('Experiment 3')
107
    print('rmse L', RMSE(means, lam))
108
    print('rmse Lm', RMSE(medians, lam))
109
    print('mean error L', MeanError(means, lam))
110
    print('mean error Lm', MeanError(medians, lam))
111

112

113
def SimulateSample(mu=90, sigma=7.5, n=9, m=1000):
114
    """Plots the sampling distribution of the sample mean.
115

116
    mu: hypothetical population mean
117
    sigma: hypothetical population standard deviation
118
    n: sample size
119
    m: number of iterations
120
    """
121
    def VertLine(x, y=1):
122
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)
123

124
    means = []
125
    for _ in range(m):
126
        xs = np.random.normal(mu, sigma, n)
127
        xbar = np.mean(xs)
128
        means.append(xbar)
129

130
    stderr = RMSE(means, mu)
131
    print('standard error', stderr)
132

133
    cdf = thinkstats2.Cdf(means)
134
    ci = cdf.Percentile(5), cdf.Percentile(95)
135
    print('confidence interval', ci)
136
    VertLine(ci[0])
137
    VertLine(ci[1])
138

139
    # plot the CDF
140
    thinkplot.Cdf(cdf)
141
    thinkplot.Save(root='estimation1',
142
                   xlabel='sample mean',
143
                   ylabel='CDF',
144
                   title='Sampling distribution')
145

146

147
def main():
148
    thinkstats2.RandomSeed(17)
149

150
    Estimate1()
151
    Estimate2()
152
    Estimate3(m=1000)
153
    SimulateSample()
154

155

156

157
if __name__ == '__main__':
158
    main()
159

160