CoCalc -- markov2.py

📚 The CoCalc Library - books, templates and other resources
Project: 📚 The Library - Shared Public Version
Path: cocalc-examples / think-python-2ed / code / markov2.py
Views: ⁹⁶¹⁶⁹
License: OTHER
1
"""This module contains a code example related to
2

3
Think Python, 2nd Edition
4
by Allen Downey
5
http://thinkpython2.com
6

7
Copyright 2015 Allen Downey
8

9
License: http://creativecommons.org/licenses/by/4.0/
10
"""
11

12
from __future__ import print_function, division
13

14

15
import sys
16
import random
17

18
from markov import skip_gutenberg_header, shift
19

20

21
class Markov:
22
    """Encapsulates the statistical summary of a text."""
23

24
    def __init__(self):
25
        self.suffix_map = {}        # map from prefixes to a list of suffixes
26
        self.prefix = ()            # current tuple of words
27

28
    def process_file(self, filename, order=2):
29
        """Reads a file and performs Markov analysis.
30

31
        filename: string
32
        order: integer number of words in the prefix
33

34
        Returns: map from prefix to list of possible suffixes.
35
        """
36
        fp = open(filename)
37
        skip_gutenberg_header(fp)
38

39
        for line in fp:
40
            if line.startswith('*** END OF THIS'): 
41
                break
42

43
            for word in line.rstrip().split():
44
                self.process_word(word, order)
45

46
    def process_word(self, word, order=2):
47
        """Processes each word.
48

49
        word: string
50
        order: integer
51

52
        During the first few iterations, all we do is store up the words; 
53
        after that we start adding entries to the dictionary.
54
        """
55
        if len(self.prefix) < order:
56
            self.prefix += (word,)
57
            return
58

59
        try:
60
            self.suffix_map[self.prefix].append(word)
61
        except KeyError:
62
            # if there is no entry for this prefix, make one
63
            self.suffix_map[self.prefix] = [word]
64

65
        self.prefix = shift(self.prefix, word)        
66

67
    def random_text(self, n=100):
68
        """Generates random wordsfrom the analyzed text.
69

70
        Starts with a random prefix from the dictionary.
71

72
        n: number of words to generate
73
        """
74
        # choose a random prefix (not weighted by frequency)
75
        start = random.choice(list(self.suffix_map.keys()))
76

77
        for i in range(n):
78
            suffixes = self.suffix_map.get(start, None)
79
            if suffixes == None:
80
                # if the prefix isn't in map, we got to the end of the
81
                # original text, so we have to start again.
82
                self.random_text(n-i)
83
                return
84

85
            # choose a random suffix
86
            word = random.choice(suffixes)
87
            print(word, end=' ')
88
            start = shift(start, word)
89

90

91
def main(script, filename='158-0.txt', n=100, order=2):
92
    try:
93
        n = int(n)
94
        order = int(order)
95
    except ValueError:
96
        print('Usage: %d filename [# of words] [prefix length]' % script)
97
    else: 
98
        markov = Markov()
99
        markov.process_file(filename, order)
100
        markov.random_text(n)
101

102

103
if __name__ == '__main__':
104
    main(*sys.argv)
105

106

107
Product

Resources

Company