Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96161
License: OTHER
1
"""This file contains code used in "Think Stats",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2010 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
import math
9
import random
10
11
import thinkstats
12
13
14
def Cov(xs, ys, mux=None, muy=None):
15
"""Computes Cov(X, Y).
16
17
Args:
18
xs: sequence of values
19
ys: sequence of values
20
mux: optional float mean of xs
21
muy: optional float mean of ys
22
23
Returns:
24
Cov(X, Y)
25
"""
26
if mux is None:
27
mux = thinkstats.Mean(xs)
28
if muy is None:
29
muy = thinkstats.Mean(ys)
30
31
total = 0.0
32
for x, y in zip(xs, ys):
33
total += (x-mux) * (y-muy)
34
35
return total / len(xs)
36
37
38
def Corr(xs, ys):
39
"""Computes Corr(X, Y).
40
41
Args:
42
xs: sequence of values
43
ys: sequence of values
44
45
Returns:
46
Corr(X, Y)
47
"""
48
xbar, varx = thinkstats.MeanVar(xs)
49
ybar, vary = thinkstats.MeanVar(ys)
50
51
corr = Cov(xs, ys, xbar, ybar) / math.sqrt(varx * vary)
52
53
return corr
54
55
56
def SerialCorr(xs):
57
"""Computes the serial correlation of a sequence."""
58
return Corr(xs[:-1], xs[1:])
59
60
61
def SpearmanCorr(xs, ys):
62
"""Computes Spearman's rank correlation.
63
64
Args:
65
xs: sequence of values
66
ys: sequence of values
67
68
Returns:
69
float Spearman's correlation
70
"""
71
xranks = MapToRanks(xs)
72
yranks = MapToRanks(ys)
73
return Corr(xranks, yranks)
74
75
76
def LeastSquares(xs, ys):
77
"""Computes a linear least squares fit for ys as a function of xs.
78
79
Args:
80
xs: sequence of values
81
ys: sequence of values
82
83
Returns:
84
tuple of (intercept, slope)
85
"""
86
xbar, varx = thinkstats.MeanVar(xs)
87
ybar, vary = thinkstats.MeanVar(ys)
88
89
slope = Cov(xs, ys, xbar, ybar) / varx
90
inter = ybar - slope * xbar
91
92
return inter, slope
93
94
95
def FitLine(xs, inter, slope):
96
"""Returns the fitted line for the range of xs.
97
98
xs: x values used for the fit
99
slope: estimated slope
100
inter: estimated intercept
101
"""
102
fxs = min(xs), max(xs)
103
fys = [x * slope + inter for x in fxs]
104
return fxs, fys
105
106
107
def Residuals(xs, ys, inter, slope):
108
"""Computes residuals for a linear fit with parameters inter and slope.
109
110
Args:
111
xs: independent variable
112
ys: dependent variable
113
inter: float intercept
114
slope: float slope
115
116
Returns:
117
list of residuals
118
"""
119
res = [y - inter - slope*x for x, y in zip(xs, ys)]
120
return res
121
122
123
def CoefDetermination(ys, res):
124
"""Computes the coefficient of determination (R^2) for given residuals.
125
126
Args:
127
ys: dependent variable
128
res: residuals
129
130
Returns:
131
float coefficient of determination
132
"""
133
ybar, vary = thinkstats.MeanVar(ys)
134
resbar, varres = thinkstats.MeanVar(res)
135
return 1 - varres / vary
136
137
138
def MapToRanks(t):
139
"""Returns a list of ranks corresponding to the elements in t.
140
141
Args:
142
t: sequence of numbers
143
144
Returns:
145
list of integer ranks, starting at 1
146
"""
147
# pair up each value with its index
148
pairs = enumerate(t)
149
150
# sort by value
151
sorted_pairs = sorted(pairs, key=lambda pair: pair[1])
152
153
# pair up each pair with its rank
154
ranked = enumerate(sorted_pairs)
155
156
# sort by index
157
resorted = sorted(ranked, key=lambda trip: trip[1][0])
158
159
# extract the ranks
160
ranks = [trip[0]+1 for trip in resorted]
161
return ranks
162
163
164
def CorrelatedGenerator(rho):
165
"""Generates standard normal variates with correlation.
166
167
rho: target coefficient of correlation
168
169
Returns: iterable
170
"""
171
x = random.gauss(0, 1)
172
yield x
173
174
sigma = math.sqrt(1 - rho**2);
175
while True:
176
x = random.gauss(x * rho, sigma)
177
yield x
178
179
180
def CorrelatedNormalGenerator(mu, sigma, rho):
181
"""Generates normal variates with correlation.
182
183
mu: mean of variate
184
sigma: standard deviation of variate
185
rho: target coefficient of correlation
186
187
Returns: iterable
188
"""
189
for x in CorrelatedGenerator(rho):
190
yield x * sigma + mu
191
192
193
def main():
194
pass
195
196
197
if __name__ == '__main__':
198
main()
199
200