Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96169
License: OTHER
1
"""This module contains a code example related to
2
3
Think Python, 2nd Edition
4
by Allen Downey
5
http://thinkpython2.com
6
7
Copyright 2015 Allen Downey
8
9
License: http://creativecommons.org/licenses/by/4.0/
10
"""
11
12
from __future__ import print_function, division
13
14
import sys
15
import string
16
import random
17
18
# global variables
19
suffix_map = {} # map from prefixes to a list of suffixes
20
prefix = () # current tuple of words
21
22
23
def process_file(filename, order=2):
24
"""Reads a file and performs Markov analysis.
25
26
filename: string
27
order: integer number of words in the prefix
28
29
returns: map from prefix to list of possible suffixes.
30
"""
31
fp = open(filename)
32
skip_gutenberg_header(fp)
33
34
for line in fp:
35
if line.startswith('*** END OF THIS'):
36
break
37
38
for word in line.rstrip().split():
39
process_word(word, order)
40
41
42
def skip_gutenberg_header(fp):
43
"""Reads from fp until it finds the line that ends the header.
44
45
fp: open file object
46
"""
47
for line in fp:
48
if line.startswith('*** START OF THIS'):
49
break
50
51
52
def process_word(word, order=2):
53
"""Processes each word.
54
55
word: string
56
order: integer
57
58
During the first few iterations, all we do is store up the words;
59
after that we start adding entries to the dictionary.
60
"""
61
global prefix
62
if len(prefix) < order:
63
prefix += (word,)
64
return
65
66
try:
67
suffix_map[prefix].append(word)
68
except KeyError:
69
# if there is no entry for this prefix, make one
70
suffix_map[prefix] = [word]
71
72
prefix = shift(prefix, word)
73
74
75
def random_text(n=100):
76
"""Generates random wordsfrom the analyzed text.
77
78
Starts with a random prefix from the dictionary.
79
80
n: number of words to generate
81
"""
82
# choose a random prefix (not weighted by frequency)
83
start = random.choice(list(suffix_map.keys()))
84
85
for i in range(n):
86
suffixes = suffix_map.get(start, None)
87
if suffixes == None:
88
# if the start isn't in map, we got to the end of the
89
# original text, so we have to start again.
90
random_text(n-i)
91
return
92
93
# choose a random suffix
94
word = random.choice(suffixes)
95
print(word, end=' ')
96
start = shift(start, word)
97
98
99
def shift(t, word):
100
"""Forms a new tuple by removing the head and adding word to the tail.
101
102
t: tuple of strings
103
word: string
104
105
Returns: tuple of strings
106
"""
107
return t[1:] + (word,)
108
109
110
def main(script, filename='158-0.txt', n=100, order=2):
111
try:
112
n = int(n)
113
order = int(order)
114
except ValueError:
115
print('Usage: %d filename [# of words] [prefix length]' % script)
116
else:
117
process_file(filename, order)
118
random_text(n)
119
print()
120
121
122
if __name__ == '__main__':
123
main(*sys.argv)
124
125