Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96161
License: OTHER
1
"""This file contains code used in "Think Bayes",
2
by Allen B. Downey, available from greenteapress.com
3
4
Copyright 2013 Allen B. Downey
5
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
6
"""
7
8
import thinkbayes
9
10
import thinkplot
11
import numpy
12
13
import math
14
import random
15
import sys
16
17
FORMATS = ['pdf', 'eps', 'png', 'jpg']
18
19
"""
20
Notation guide:
21
22
z: time between trains
23
x: time since the last train
24
y: time until the next train
25
26
zb: distribution of z as seen by a random arrival
27
28
"""
29
30
# longest hypothetical time between trains, in seconds
31
32
UPPER_BOUND = 1200
33
34
# observed gaps between trains, in seconds
35
# collected using code in redline_data.py, run daily 4-6pm
36
# for 5 days, Monday 6 May 2013 to Friday 10 May 2013
37
38
OBSERVED_GAP_TIMES = [
39
428.0, 705.0, 407.0, 465.0, 433.0, 425.0, 204.0, 506.0, 143.0, 351.0,
40
450.0, 598.0, 464.0, 749.0, 341.0, 586.0, 754.0, 256.0, 378.0, 435.0,
41
176.0, 405.0, 360.0, 519.0, 648.0, 374.0, 483.0, 537.0, 578.0, 534.0,
42
577.0, 619.0, 538.0, 331.0, 186.0, 629.0, 193.0, 360.0, 660.0, 484.0,
43
512.0, 315.0, 457.0, 404.0, 740.0, 388.0, 357.0, 485.0, 567.0, 160.0,
44
428.0, 387.0, 901.0, 187.0, 622.0, 616.0, 585.0, 474.0, 442.0, 499.0,
45
437.0, 620.0, 351.0, 286.0, 373.0, 232.0, 393.0, 745.0, 636.0, 758.0,
46
]
47
48
49
def BiasPmf(pmf, name='', invert=False):
50
"""Returns the Pmf with oversampling proportional to value.
51
52
If pmf is the distribution of true values, the result is the
53
distribution that would be seen if values are oversampled in
54
proportion to their values; for example, if you ask students
55
how big their classes are, large classes are oversampled in
56
proportion to their size.
57
58
If invert=True, computes in inverse operation; for example,
59
unbiasing a sample collected from students.
60
61
Args:
62
pmf: Pmf object.
63
name: string name for the new Pmf.
64
invert: boolean
65
66
Returns:
67
Pmf object
68
"""
69
new_pmf = pmf.Copy(name=name)
70
71
for x in pmf.Values():
72
if invert:
73
new_pmf.Mult(x, 1.0/x)
74
else:
75
new_pmf.Mult(x, x)
76
77
new_pmf.Normalize()
78
return new_pmf
79
80
81
def UnbiasPmf(pmf, name=''):
82
"""Returns the Pmf with oversampling proportional to 1/value.
83
84
Args:
85
pmf: Pmf object.
86
name: string name for the new Pmf.
87
88
Returns:
89
Pmf object
90
"""
91
return BiasPmf(pmf, name, invert=True)
92
93
94
def MakeUniformPmf(low, high):
95
"""Make a uniform Pmf.
96
97
low: lowest value (inclusive)
98
high: highest value (inclusive)
99
"""
100
pmf = thinkbayes.Pmf()
101
for x in MakeRange(low=low, high=high):
102
pmf.Set(x, 1)
103
pmf.Normalize()
104
return pmf
105
106
107
def MakeRange(low=10, high=None, skip=10):
108
"""Makes a range representing possible gap times in seconds.
109
110
low: where to start
111
high: where to end
112
skip: how many to skip
113
"""
114
if high is None:
115
high = UPPER_BOUND
116
117
return range(low, high+skip, skip)
118
119
120
class WaitTimeCalculator(object):
121
"""Encapsulates the forward inference process.
122
123
Given the actual distribution of gap times (z),
124
computes the distribution of gaps as seen by
125
a random passenger (zb), which yields the distribution
126
of wait times (y) and the distribution of elapsed times (x).
127
"""
128
129
def __init__(self, pmf, inverse=False):
130
"""Constructor.
131
132
pmf: Pmf of either z or zb
133
inverse: boolean, true if pmf is zb, false if pmf is z
134
"""
135
if inverse:
136
self.pmf_zb = pmf
137
self.pmf_z = UnbiasPmf(pmf, name="z")
138
else:
139
self.pmf_z = pmf
140
self.pmf_zb = BiasPmf(pmf, name="zb")
141
142
# distribution of wait time
143
self.pmf_y = PmfOfWaitTime(self.pmf_zb)
144
145
# the distribution of elapsed time is the same as the
146
# distribution of wait time
147
self.pmf_x = self.pmf_y
148
149
def GenerateSampleWaitTimes(self, n):
150
"""Generates a random sample of wait times.
151
152
n: sample size
153
154
Returns: sequence of values
155
"""
156
cdf_y = thinkbayes.MakeCdfFromPmf(self.pmf_y)
157
sample = cdf_y.Sample(n)
158
return sample
159
160
def GenerateSampleGaps(self, n):
161
"""Generates a random sample of gaps seen by passengers.
162
163
n: sample size
164
165
Returns: sequence of values
166
"""
167
cdf_zb = thinkbayes.MakeCdfFromPmf(self.pmf_zb)
168
sample = cdf_zb.Sample(n)
169
return sample
170
171
def GenerateSamplePassengers(self, lam, n):
172
"""Generates a sample wait time and number of arrivals.
173
174
lam: arrival rate in passengers per second
175
n: number of samples
176
177
Returns: list of (k1, y, k2) tuples
178
k1: passengers there on arrival
179
y: wait time
180
k2: passengers arrived while waiting
181
"""
182
zs = self.GenerateSampleGaps(n)
183
xs, ys = SplitGaps(zs)
184
185
res = []
186
for x, y in zip(xs, ys):
187
k1 = numpy.random.poisson(lam * x)
188
k2 = numpy.random.poisson(lam * y)
189
res.append((k1, y, k2))
190
191
return res
192
193
def PlotPmfs(self, root='redline0'):
194
"""Plots the computed Pmfs.
195
196
root: string
197
"""
198
pmfs = ScaleDists([self.pmf_z, self.pmf_zb], 1.0/60)
199
200
thinkplot.Clf()
201
thinkplot.PrePlot(2)
202
thinkplot.Pmfs(pmfs)
203
thinkplot.Save(root=root,
204
xlabel='Time (min)',
205
ylabel='CDF',
206
formats=FORMATS)
207
208
209
def MakePlot(self, root='redline2'):
210
"""Plots the computed CDFs.
211
212
root: string
213
"""
214
print 'Mean z', self.pmf_z.Mean() / 60
215
print 'Mean zb', self.pmf_zb.Mean() / 60
216
print 'Mean y', self.pmf_y.Mean() / 60
217
218
cdf_z = self.pmf_z.MakeCdf()
219
cdf_zb = self.pmf_zb.MakeCdf()
220
cdf_y = self.pmf_y.MakeCdf()
221
222
cdfs = ScaleDists([cdf_z, cdf_zb, cdf_y], 1.0/60)
223
224
thinkplot.Clf()
225
thinkplot.PrePlot(3)
226
thinkplot.Cdfs(cdfs)
227
thinkplot.Save(root=root,
228
xlabel='Time (min)',
229
ylabel='CDF',
230
formats=FORMATS)
231
232
233
def SplitGaps(zs):
234
"""Splits zs into xs and ys.
235
236
zs: sequence of gaps
237
238
Returns: tuple of sequences (xs, ys)
239
"""
240
xs = [random.uniform(0, z) for z in zs]
241
ys = [z-x for z, x in zip(zs, xs)]
242
return xs, ys
243
244
245
def PmfOfWaitTime(pmf_zb):
246
"""Distribution of wait time.
247
248
pmf_zb: dist of gap time as seen by a random observer
249
250
Returns: dist of wait time (also dist of elapsed time)
251
"""
252
metapmf = thinkbayes.Pmf()
253
for gap, prob in pmf_zb.Items():
254
uniform = MakeUniformPmf(0, gap)
255
metapmf.Set(uniform, prob)
256
257
pmf_y = thinkbayes.MakeMixture(metapmf, name='y')
258
return pmf_y
259
260
261
def ScaleDists(dists, factor):
262
"""Scales each of the distributions in a sequence.
263
264
dists: sequence of Pmf or Cdf
265
factor: float scale factor
266
"""
267
return [dist.Scale(factor) for dist in dists]
268
269
270
class ElapsedTimeEstimator(object):
271
"""Uses the number of passengers to estimate time since last train."""
272
273
def __init__(self, wtc, lam, num_passengers):
274
"""Constructor.
275
276
pmf_x: expected distribution of elapsed time
277
lam: arrival rate in passengers per second
278
num_passengers: # passengers seen on the platform
279
"""
280
# prior for elapsed time
281
self.prior_x = Elapsed(wtc.pmf_x, name='prior x')
282
283
# posterior of elapsed time (based on number of passengers)
284
self.post_x = self.prior_x.Copy(name='posterior x')
285
self.post_x.Update((lam, num_passengers))
286
287
# predictive distribution of wait time
288
self.pmf_y = PredictWaitTime(wtc.pmf_zb, self.post_x)
289
290
def MakePlot(self, root='redline3'):
291
"""Plot the CDFs.
292
293
root: string
294
"""
295
# observed gaps
296
cdf_prior_x = self.prior_x.MakeCdf()
297
cdf_post_x = self.post_x.MakeCdf()
298
cdf_y = self.pmf_y.MakeCdf()
299
300
cdfs = ScaleDists([cdf_prior_x, cdf_post_x, cdf_y], 1.0/60)
301
302
thinkplot.Clf()
303
thinkplot.PrePlot(3)
304
thinkplot.Cdfs(cdfs)
305
thinkplot.Save(root=root,
306
xlabel='Time (min)',
307
ylabel='CDF',
308
formats=FORMATS)
309
310
311
class ArrivalRate(thinkbayes.Suite):
312
"""Represents the distribution of arrival rates (lambda)."""
313
314
def Likelihood(self, data, hypo):
315
"""Computes the likelihood of the data under the hypothesis.
316
317
Evaluates the Poisson PMF for lambda and k.
318
319
hypo: arrival rate in passengers per second
320
data: tuple of elapsed_time and number of passengers
321
"""
322
lam = hypo
323
x, k = data
324
like = thinkbayes.EvalPoissonPmf(k, lam * x)
325
return like
326
327
328
class ArrivalRateEstimator(object):
329
"""Estimates arrival rate based on passengers that arrive while waiting.
330
"""
331
332
def __init__(self, passenger_data):
333
"""Constructor
334
335
passenger_data: sequence of (k1, y, k2) pairs
336
"""
337
# range for lambda
338
low, high = 0, 5
339
n = 51
340
hypos = numpy.linspace(low, high, n) / 60
341
342
self.prior_lam = ArrivalRate(hypos, name='prior')
343
self.prior_lam.Remove(0)
344
345
self.post_lam = self.prior_lam.Copy(name='posterior')
346
347
for _k1, y, k2 in passenger_data:
348
self.post_lam.Update((y, k2))
349
350
print 'Mean posterior lambda', self.post_lam.Mean()
351
352
def MakePlot(self, root='redline1'):
353
"""Plot the prior and posterior CDF of passengers arrival rate.
354
355
root: string
356
"""
357
thinkplot.Clf()
358
thinkplot.PrePlot(2)
359
360
# convert units to passengers per minute
361
prior = self.prior_lam.MakeCdf().Scale(60)
362
post = self.post_lam.MakeCdf().Scale(60)
363
364
thinkplot.Cdfs([prior, post])
365
366
thinkplot.Save(root=root,
367
xlabel='Arrival rate (passengers / min)',
368
ylabel='CDF',
369
formats=FORMATS)
370
371
372
class Elapsed(thinkbayes.Suite):
373
"""Represents the distribution of elapsed time (x)."""
374
375
def Likelihood(self, data, hypo):
376
"""Computes the likelihood of the data under the hypothesis.
377
378
Evaluates the Poisson PMF for lambda and k.
379
380
hypo: elapsed time since the last train
381
data: tuple of arrival rate and number of passengers
382
"""
383
x = hypo
384
lam, k = data
385
like = thinkbayes.EvalPoissonPmf(k, lam * x)
386
return like
387
388
389
def PredictWaitTime(pmf_zb, pmf_x):
390
"""Computes the distribution of wait times.
391
392
Enumerate all pairs of zb from pmf_zb and x from pmf_x,
393
and accumulate the distribution of y = z - x.
394
395
pmf_zb: distribution of gaps seen by random observer
396
pmf_x: distribution of elapsed time
397
"""
398
pmf_y = pmf_zb - pmf_x
399
pmf_y.name = 'pred y'
400
RemoveNegatives(pmf_y)
401
return pmf_y
402
403
404
def RemoveNegatives(pmf):
405
"""Removes negative values from a PMF.
406
407
pmf: Pmf
408
"""
409
for val in pmf.Values():
410
if val < 0:
411
pmf.Remove(val)
412
pmf.Normalize()
413
414
415
class Gaps(thinkbayes.Suite):
416
"""Represents the distribution of gap times,
417
as updated by an observed waiting time."""
418
419
def Likelihood(self, data, hypo):
420
"""The likelihood of the data under the hypothesis.
421
422
If the actual gap time is z, what is the likelihood
423
of waiting y seconds?
424
425
hypo: actual time between trains
426
data: observed wait time
427
"""
428
z = hypo
429
y = data
430
if y > z:
431
return 0
432
return 1.0 / z
433
434
435
class GapDirichlet(thinkbayes.Dirichlet):
436
"""Represents the distribution of prevalences for each
437
gap time."""
438
439
def __init__(self, xs):
440
"""Constructor.
441
442
xs: sequence of possible gap times
443
"""
444
n = len(xs)
445
thinkbayes.Dirichlet.__init__(self, n)
446
self.xs = xs
447
self.mean_zbs = []
448
449
def PmfMeanZb(self):
450
"""Makes the Pmf of mean zb.
451
452
Values stored in mean_zbs.
453
"""
454
return thinkbayes.MakePmfFromList(self.mean_zbs)
455
456
def Preload(self, data):
457
"""Adds pseudocounts to the parameters.
458
459
data: sequence of pseudocounts
460
"""
461
thinkbayes.Dirichlet.Update(self, data)
462
463
def Update(self, data):
464
"""Computes the likelihood of the data.
465
466
data: wait time observed by random arrival (y)
467
468
Returns: float probability
469
"""
470
k, y = data
471
472
print k, y
473
prior = self.PredictivePmf(self.xs)
474
gaps = Gaps(prior)
475
gaps.Update(y)
476
probs = gaps.Probs(self.xs)
477
478
self.params += numpy.array(probs)
479
480
481
class GapDirichlet2(GapDirichlet):
482
"""Represents the distribution of prevalences for each
483
gap time."""
484
485
def Update(self, data):
486
"""Computes the likelihood of the data.
487
488
data: wait time observed by random arrival (y)
489
490
Returns: float probability
491
"""
492
k, y = data
493
494
# get the current best guess for pmf_z
495
pmf_zb = self.PredictivePmf(self.xs)
496
497
# use it to compute prior pmf_x, pmf_y, pmf_z
498
wtc = WaitTimeCalculator(pmf_zb, inverse=True)
499
500
# use the observed passengers to estimate posterior pmf_x
501
elapsed = ElapsedTimeEstimator(wtc,
502
lam=0.0333,
503
num_passengers=k)
504
505
# use posterior_x and observed y to estimate observed z
506
obs_zb = elapsed.post_x + Floor(y)
507
probs = obs_zb.Probs(self.xs)
508
509
mean_zb = obs_zb.Mean()
510
self.mean_zbs.append(mean_zb)
511
print k, y, mean_zb
512
513
# use observed z to update beliefs about pmf_z
514
self.params += numpy.array(probs)
515
516
517
class GapTimeEstimator(object):
518
"""Infers gap times using passenger data."""
519
520
def __init__(self, xs, pcounts, passenger_data):
521
self.xs = xs
522
self.pcounts = pcounts
523
self.passenger_data = passenger_data
524
525
self.wait_times = [y for _k1, y, _k2 in passenger_data]
526
self.pmf_y = thinkbayes.MakePmfFromList(self.wait_times, name="y")
527
528
dirichlet = GapDirichlet2(self.xs)
529
dirichlet.params /= 1.0
530
531
dirichlet.Preload(self.pcounts)
532
dirichlet.params /= 20.0
533
534
self.prior_zb = dirichlet.PredictivePmf(self.xs, name="prior zb")
535
536
for k1, y, _k2 in passenger_data:
537
dirichlet.Update((k1, y))
538
539
self.pmf_mean_zb = dirichlet.PmfMeanZb()
540
541
self.post_zb = dirichlet.PredictivePmf(self.xs, name="post zb")
542
self.post_z = UnbiasPmf(self.post_zb, name="post z")
543
544
def PlotPmfs(self):
545
"""Plot the PMFs."""
546
print 'Mean y', self.pmf_y.Mean()
547
print 'Mean z', self.post_z.Mean()
548
print 'Mean zb', self.post_zb.Mean()
549
550
thinkplot.Pmf(self.pmf_y)
551
thinkplot.Pmf(self.post_z)
552
thinkplot.Pmf(self.post_zb)
553
554
def MakePlot(self):
555
"""Plot the CDFs."""
556
thinkplot.Cdf(self.pmf_y.MakeCdf())
557
thinkplot.Cdf(self.prior_zb.MakeCdf())
558
thinkplot.Cdf(self.post_zb.MakeCdf())
559
thinkplot.Cdf(self.pmf_mean_zb.MakeCdf())
560
thinkplot.Show()
561
562
563
def Floor(x, factor=10):
564
"""Rounds down to the nearest multiple of factor.
565
566
When factor=10, all numbers from 10 to 19 get floored to 10.
567
"""
568
return int(x/factor) * factor
569
570
571
def TestGte():
572
"""Tests the GapTimeEstimator."""
573
random.seed(17)
574
575
xs = [60, 120, 240]
576
577
gap_times = [60, 60, 60, 60, 60, 120, 120, 120, 240, 240]
578
579
# distribution of gap time (z)
580
pdf_z = thinkbayes.EstimatedPdf(gap_times)
581
pmf_z = pdf_z.MakePmf(xs, name="z")
582
583
wtc = WaitTimeCalculator(pmf_z, inverse=False)
584
585
lam = 0.0333
586
n = 100
587
passenger_data = wtc.GenerateSamplePassengers(lam, n)
588
589
pcounts = [0, 0, 0]
590
591
ite = GapTimeEstimator(xs, pcounts, passenger_data)
592
593
thinkplot.Clf()
594
595
# thinkplot.Cdf(wtc.pmf_z.MakeCdf(name="actual z"))
596
thinkplot.Cdf(wtc.pmf_zb.MakeCdf(name="actual zb"))
597
ite.MakePlot()
598
599
600
class WaitMixtureEstimator(object):
601
"""Encapsulates the process of estimating wait time with uncertain lam.
602
"""
603
604
def __init__(self, wtc, are, num_passengers=15):
605
"""Constructor.
606
607
wtc: WaitTimeCalculator
608
are: ArrivalTimeEstimator
609
num_passengers: number of passengers seen on the platform
610
"""
611
self.metapmf = thinkbayes.Pmf()
612
613
for lam, prob in sorted(are.post_lam.Items()):
614
ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
615
self.metapmf.Set(ete.pmf_y, prob)
616
617
self.mixture = thinkbayes.MakeMixture(self.metapmf)
618
619
lam = are.post_lam.Mean()
620
ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
621
self.point = ete.pmf_y
622
623
def MakePlot(self, root='redline4'):
624
"""Makes a plot showing the mixture."""
625
thinkplot.Clf()
626
627
# plot the MetaPmf
628
for pmf, prob in sorted(self.metapmf.Items()):
629
cdf = pmf.MakeCdf().Scale(1.0/60)
630
width = 2/math.log(-math.log(prob))
631
thinkplot.Plot(cdf.xs, cdf.ps,
632
alpha=0.2, linewidth=width, color='blue',
633
label='')
634
635
# plot the mixture and the distribution based on a point estimate
636
thinkplot.PrePlot(2)
637
#thinkplot.Cdf(self.point.MakeCdf(name='point').Scale(1.0/60))
638
thinkplot.Cdf(self.mixture.MakeCdf(name='mix').Scale(1.0/60))
639
640
thinkplot.Save(root=root,
641
xlabel='Wait time (min)',
642
ylabel='CDF',
643
formats=FORMATS,
644
axis=[0,10,0,1])
645
646
647
648
def GenerateSampleData(gap_times, lam=0.0333, n=10):
649
"""Generates passenger data based on actual gap times.
650
651
gap_times: sequence of float
652
lam: arrival rate in passengers per second
653
n: number of simulated observations
654
"""
655
xs = MakeRange(low=10)
656
pdf_z = thinkbayes.EstimatedPdf(gap_times)
657
pmf_z = pdf_z.MakePmf(xs, name="z")
658
659
wtc = WaitTimeCalculator(pmf_z, inverse=False)
660
passenger_data = wtc.GenerateSamplePassengers(lam, n)
661
return wtc, passenger_data
662
663
664
def RandomSeed(x):
665
"""Initialize the random and numpy.random generators.
666
667
x: int seed
668
"""
669
random.seed(x)
670
numpy.random.seed(x)
671
672
673
def RunSimpleProcess(gap_times, lam=0.0333, num_passengers=15, plot=True):
674
"""Runs the basic analysis and generates figures.
675
676
gap_times: sequence of float
677
lam: arrival rate in passengers per second
678
num_passengers: int number of passengers on the platform
679
plot: boolean, whether to generate plots
680
681
Returns: WaitTimeCalculator, ElapsedTimeEstimator
682
"""
683
global UPPER_BOUND
684
UPPER_BOUND = 1200
685
686
cdf_z = thinkbayes.MakeCdfFromList(gap_times).Scale(1.0/60)
687
print 'CI z', cdf_z.CredibleInterval(90)
688
689
xs = MakeRange(low=10)
690
691
pdf_z = thinkbayes.EstimatedPdf(gap_times)
692
pmf_z = pdf_z.MakePmf(xs, name="z")
693
694
wtc = WaitTimeCalculator(pmf_z, inverse=False)
695
696
if plot:
697
wtc.PlotPmfs()
698
wtc.MakePlot()
699
700
ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
701
702
if plot:
703
ete.MakePlot()
704
705
return wtc, ete
706
707
708
def RunMixProcess(gap_times, lam=0.0333, num_passengers=15, plot=True):
709
"""Runs the analysis for unknown lambda.
710
711
gap_times: sequence of float
712
lam: arrival rate in passengers per second
713
num_passengers: int number of passengers on the platform
714
plot: boolean, whether to generate plots
715
716
Returns: WaitMixtureEstimator
717
"""
718
global UPPER_BOUND
719
UPPER_BOUND = 1200
720
721
wtc, _ete = RunSimpleProcess(gap_times, lam, num_passengers)
722
723
RandomSeed(20)
724
passenger_data = wtc.GenerateSamplePassengers(lam, n=5)
725
726
total_y = 0
727
total_k2 = 0
728
for k1, y, k2 in passenger_data:
729
print k1, y/60, k2
730
total_y += y/60
731
total_k2 += k2
732
print total_k2, total_y
733
print 'Average arrival rate', total_k2 / total_y
734
735
are = ArrivalRateEstimator(passenger_data)
736
737
if plot:
738
are.MakePlot()
739
740
wme = WaitMixtureEstimator(wtc, are, num_passengers)
741
742
if plot:
743
wme.MakePlot()
744
745
return wme
746
747
748
def RunLoop(gap_times, nums, lam=0.0333):
749
"""Runs the basic analysis for a range of num_passengers.
750
751
gap_times: sequence of float
752
nums: sequence of values for num_passengers
753
lam: arrival rate in passengers per second
754
755
Returns: WaitMixtureEstimator
756
"""
757
global UPPER_BOUND
758
UPPER_BOUND = 4000
759
760
thinkplot.Clf()
761
762
RandomSeed(18)
763
764
# resample gap_times
765
n = 220
766
cdf_z = thinkbayes.MakeCdfFromList(gap_times)
767
sample_z = cdf_z.Sample(n)
768
pmf_z = thinkbayes.MakePmfFromList(sample_z)
769
770
# compute the biased pmf and add some long delays
771
cdf_zp = BiasPmf(pmf_z).MakeCdf()
772
sample_zb = cdf_zp.Sample(n) + [1800, 2400, 3000]
773
774
# smooth the distribution of zb
775
pdf_zb = thinkbayes.EstimatedPdf(sample_zb)
776
xs = MakeRange(low=60)
777
pmf_zb = pdf_zb.MakePmf(xs)
778
779
# unbias the distribution of zb and make wtc
780
pmf_z = UnbiasPmf(pmf_zb)
781
wtc = WaitTimeCalculator(pmf_z)
782
783
probs = []
784
for num_passengers in nums:
785
ete = ElapsedTimeEstimator(wtc, lam, num_passengers)
786
787
# compute the posterior prob of waiting more than 15 minutes
788
cdf_y = ete.pmf_y.MakeCdf()
789
prob = 1 - cdf_y.Prob(900)
790
probs.append(prob)
791
792
# thinkplot.Cdf(ete.pmf_y.MakeCdf(name=str(num_passengers)))
793
794
thinkplot.Plot(nums, probs)
795
thinkplot.Save(root='redline5',
796
xlabel='Num passengers',
797
ylabel='P(y > 15 min)',
798
formats=FORMATS,
799
)
800
801
802
def main(script):
803
RunLoop(OBSERVED_GAP_TIMES, nums=[0, 5, 10, 15, 20, 25, 30, 35])
804
RunMixProcess(OBSERVED_GAP_TIMES)
805
806
807
if __name__ == '__main__':
808
main(*sys.argv)
809
810