Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96161
License: OTHER
1
"""This file contains code for use with "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2012 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
import math
9
10
import columns
11
import thinkbayes
12
import thinkstats
13
import thinkplot
14
15
16
USE_SUMMARY_DATA = True
17
18
class Hockey(thinkbayes.Suite):
19
"""Represents hypotheses about the scoring rate for a team."""
20
21
def __init__(self, name=''):
22
"""Initializes the Hockey object.
23
24
name: string
25
"""
26
if USE_SUMMARY_DATA:
27
# prior based on each team's average goals scored
28
mu = 2.8
29
sigma = 0.3
30
else:
31
# prior based on each pair-wise match-up
32
mu = 2.8
33
sigma = 0.85
34
35
pmf = thinkbayes.MakeGaussianPmf(mu, sigma, 4)
36
thinkbayes.Suite.__init__(self, pmf, name=name)
37
38
def Likelihood(self, data, hypo):
39
"""Computes the likelihood of the data under the hypothesis.
40
41
Evaluates the Poisson PMF for lambda and k.
42
43
hypo: goal scoring rate in goals per game
44
data: goals scored in one period
45
"""
46
lam = hypo
47
k = data
48
like = thinkbayes.EvalPoissonPmf(k, lam)
49
return like
50
51
52
def MakeGoalPmf(suite, high=10):
53
"""Makes the distribution of goals scored, given distribution of lam.
54
55
suite: distribution of goal-scoring rate
56
high: upper bound
57
58
returns: Pmf of goals per game
59
"""
60
metapmf = thinkbayes.Pmf()
61
62
for lam, prob in suite.Items():
63
pmf = thinkbayes.MakePoissonPmf(lam, high)
64
metapmf.Set(pmf, prob)
65
66
mix = thinkbayes.MakeMixture(metapmf, name=suite.name)
67
return mix
68
69
70
def MakeGoalTimePmf(suite):
71
"""Makes the distribution of time til first goal.
72
73
suite: distribution of goal-scoring rate
74
75
returns: Pmf of goals per game
76
"""
77
metapmf = thinkbayes.Pmf()
78
79
for lam, prob in suite.Items():
80
pmf = thinkbayes.MakeExponentialPmf(lam, high=2, n=2001)
81
metapmf.Set(pmf, prob)
82
83
mix = thinkbayes.MakeMixture(metapmf, name=suite.name)
84
return mix
85
86
87
class Game(object):
88
"""Represents a game.
89
90
Attributes are set in columns.read_csv.
91
"""
92
convert = dict()
93
94
def clean(self):
95
self.goals = self.pd1 + self.pd2 + self.pd3
96
97
98
def ReadHockeyData(filename='hockey_data.csv'):
99
"""Read game scores from the data file.
100
101
filename: string
102
"""
103
game_list = columns.read_csv(filename, Game)
104
105
# map from gameID to list of two games
106
games = {}
107
for game in game_list:
108
if game.season != 2011:
109
continue
110
key = game.game
111
games.setdefault(key, []).append(game)
112
113
# map from (team1, team2) to (score1, score2)
114
pairs = {}
115
for key, pair in games.iteritems():
116
t1, t2 = pair
117
key = t1.team, t2.team
118
entry = t1.total, t2.total
119
pairs.setdefault(key, []).append(entry)
120
121
ProcessScoresTeamwise(pairs)
122
ProcessScoresPairwise(pairs)
123
124
125
def ProcessScoresPairwise(pairs):
126
"""Average number of goals for each team against each opponent.
127
128
pairs: map from (team1, team2) to (score1, score2)
129
"""
130
# map from (team1, team2) to list of goals scored
131
goals_scored = {}
132
for key, entries in pairs.iteritems():
133
t1, t2 = key
134
for entry in entries:
135
g1, g2 = entry
136
goals_scored.setdefault((t1, t2), []).append(g1)
137
goals_scored.setdefault((t2, t1), []).append(g2)
138
139
# make a list of average goals scored
140
lams = []
141
for key, goals in goals_scored.iteritems():
142
if len(goals) < 3:
143
continue
144
lam = thinkstats.Mean(goals)
145
lams.append(lam)
146
147
# make the distribution of average goals scored
148
cdf = thinkbayes.MakeCdfFromList(lams)
149
thinkplot.Cdf(cdf)
150
thinkplot.Show()
151
152
mu, var = thinkstats.MeanVar(lams)
153
print 'mu, sig', mu, math.sqrt(var)
154
155
print 'BOS v VAN', pairs['BOS', 'VAN']
156
157
158
def ProcessScoresTeamwise(pairs):
159
"""Average number of goals for each team.
160
161
pairs: map from (team1, team2) to (score1, score2)
162
"""
163
# map from team to list of goals scored
164
goals_scored = {}
165
for key, entries in pairs.iteritems():
166
t1, t2 = key
167
for entry in entries:
168
g1, g2 = entry
169
goals_scored.setdefault(t1, []).append(g1)
170
goals_scored.setdefault(t2, []).append(g2)
171
172
# make a list of average goals scored
173
lams = []
174
for key, goals in goals_scored.iteritems():
175
lam = thinkstats.Mean(goals)
176
lams.append(lam)
177
178
# make the distribution of average goals scored
179
cdf = thinkbayes.MakeCdfFromList(lams)
180
thinkplot.Cdf(cdf)
181
thinkplot.Show()
182
183
mu, var = thinkstats.MeanVar(lams)
184
print 'mu, sig', mu, math.sqrt(var)
185
186
187
def main():
188
#ReadHockeyData()
189
#return
190
191
formats = ['pdf', 'eps']
192
193
suite1 = Hockey('bruins')
194
suite2 = Hockey('canucks')
195
196
thinkplot.Clf()
197
thinkplot.PrePlot(num=2)
198
thinkplot.Pmf(suite1)
199
thinkplot.Pmf(suite2)
200
thinkplot.Save(root='hockey0',
201
xlabel='Goals per game',
202
ylabel='Probability',
203
formats=formats)
204
205
suite1.UpdateSet([0, 2, 8, 4])
206
suite2.UpdateSet([1, 3, 1, 0])
207
208
thinkplot.Clf()
209
thinkplot.PrePlot(num=2)
210
thinkplot.Pmf(suite1)
211
thinkplot.Pmf(suite2)
212
thinkplot.Save(root='hockey1',
213
xlabel='Goals per game',
214
ylabel='Probability',
215
formats=formats)
216
217
218
goal_dist1 = MakeGoalPmf(suite1)
219
goal_dist2 = MakeGoalPmf(suite2)
220
221
thinkplot.Clf()
222
thinkplot.PrePlot(num=2)
223
thinkplot.Pmf(goal_dist1)
224
thinkplot.Pmf(goal_dist2)
225
thinkplot.Save(root='hockey2',
226
xlabel='Goals',
227
ylabel='Probability',
228
formats=formats)
229
230
time_dist1 = MakeGoalTimePmf(suite1)
231
time_dist2 = MakeGoalTimePmf(suite2)
232
233
print 'MLE bruins', suite1.MaximumLikelihood()
234
print 'MLE canucks', suite2.MaximumLikelihood()
235
236
thinkplot.Clf()
237
thinkplot.PrePlot(num=2)
238
thinkplot.Pmf(time_dist1)
239
thinkplot.Pmf(time_dist2)
240
thinkplot.Save(root='hockey3',
241
xlabel='Games until goal',
242
ylabel='Probability',
243
formats=formats)
244
245
diff = goal_dist1 - goal_dist2
246
p_win = diff.ProbGreater(0)
247
p_loss = diff.ProbLess(0)
248
p_tie = diff.Prob(0)
249
250
print p_win, p_loss, p_tie
251
252
p_overtime = thinkbayes.PmfProbLess(time_dist1, time_dist2)
253
p_adjust = thinkbayes.PmfProbEqual(time_dist1, time_dist2)
254
p_overtime += p_adjust / 2
255
print 'p_overtime', p_overtime
256
257
print p_overtime * p_tie
258
p_win += p_overtime * p_tie
259
print 'p_win', p_win
260
261
# win the next two
262
p_series = p_win**2
263
264
# split the next two, win the third
265
p_series += 2 * p_win * (1-p_win) * p_win
266
267
print 'p_series', p_series
268
269
270
if __name__ == '__main__':
271
main()
272
273