Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96168
License: OTHER
1
"""This module contains a code example related to
2
3
Think Python, 2nd Edition
4
by Allen Downey
5
http://thinkpython2.com
6
7
Copyright 2015 Allen Downey
8
9
License: http://creativecommons.org/licenses/by/4.0/
10
"""
11
12
from __future__ import print_function, division
13
14
import random
15
16
from bisect import bisect
17
18
from analyze_book1 import process_file
19
20
21
def random_word(hist):
22
"""Chooses a random word from a histogram.
23
24
The probability of each word is proportional to its frequency.
25
26
hist: map from word to frequency
27
"""
28
# TODO: This could be made faster by computing the cumulative
29
# frequencies once and reusing them.
30
31
words = []
32
freqs = []
33
total_freq = 0
34
35
# make a list of words and a list of cumulative frequencies
36
for word, freq in hist.items():
37
total_freq += freq
38
words.append(word)
39
freqs.append(total_freq)
40
41
# choose a random value and find its location in the cumulative list
42
x = random.randint(0, total_freq-1)
43
index = bisect(freqs, x)
44
return words[index]
45
46
47
def main():
48
hist = process_file('158-0.txt', skip_header=True)
49
50
print("\n\nHere are some random words from the book")
51
for i in range(100):
52
print(random_word(hist), end=' ')
53
54
55
if __name__ == '__main__':
56
main()
57
58
59