Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 96169
License: OTHER
1
"""This module contains a code example related to
2
3
Think Python, 2nd Edition
4
by Allen Downey
5
http://thinkpython2.com
6
7
Copyright 2015 Allen Downey
8
9
License: http://creativecommons.org/licenses/by/4.0/
10
"""
11
12
from __future__ import print_function, division
13
14
15
import sys
16
import random
17
18
from markov import skip_gutenberg_header, shift
19
20
21
class Markov:
22
"""Encapsulates the statistical summary of a text."""
23
24
def __init__(self):
25
self.suffix_map = {} # map from prefixes to a list of suffixes
26
self.prefix = () # current tuple of words
27
28
def process_file(self, filename, order=2):
29
"""Reads a file and performs Markov analysis.
30
31
filename: string
32
order: integer number of words in the prefix
33
34
Returns: map from prefix to list of possible suffixes.
35
"""
36
fp = open(filename)
37
skip_gutenberg_header(fp)
38
39
for line in fp:
40
if line.startswith('*** END OF THIS'):
41
break
42
43
for word in line.rstrip().split():
44
self.process_word(word, order)
45
46
def process_word(self, word, order=2):
47
"""Processes each word.
48
49
word: string
50
order: integer
51
52
During the first few iterations, all we do is store up the words;
53
after that we start adding entries to the dictionary.
54
"""
55
if len(self.prefix) < order:
56
self.prefix += (word,)
57
return
58
59
try:
60
self.suffix_map[self.prefix].append(word)
61
except KeyError:
62
# if there is no entry for this prefix, make one
63
self.suffix_map[self.prefix] = [word]
64
65
self.prefix = shift(self.prefix, word)
66
67
def random_text(self, n=100):
68
"""Generates random wordsfrom the analyzed text.
69
70
Starts with a random prefix from the dictionary.
71
72
n: number of words to generate
73
"""
74
# choose a random prefix (not weighted by frequency)
75
start = random.choice(list(self.suffix_map.keys()))
76
77
for i in range(n):
78
suffixes = self.suffix_map.get(start, None)
79
if suffixes == None:
80
# if the prefix isn't in map, we got to the end of the
81
# original text, so we have to start again.
82
self.random_text(n-i)
83
return
84
85
# choose a random suffix
86
word = random.choice(suffixes)
87
print(word, end=' ')
88
start = shift(start, word)
89
90
91
def main(script, filename='158-0.txt', n=100, order=2):
92
try:
93
n = int(n)
94
order = int(order)
95
except ValueError:
96
print('Usage: %d filename [# of words] [prefix length]' % script)
97
else:
98
markov = Markov()
99
markov.process_file(filename, order)
100
markov.random_text(n)
101
102
103
if __name__ == '__main__':
104
main(*sys.argv)
105
106
107