-
Notifications
You must be signed in to change notification settings - Fork 0
/
markov_haiku.py
225 lines (193 loc) · 7.86 KB
/
markov_haiku.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import sys
from collections import defaultdict
from string import punctuation
import logging
import random
import count_syllables
# !!!
logging.disable(logging.CRITICAL) # comment-out to enable debugging messages
logging.basicConfig(level=logging.DEBUG, format='%(message)s')
def load_training_file(file):
# Return a text file as a string
with open(file) as f:
raw_haiku = f.read()
# When you call read() on a file object,
# it reads the contents of the file and returns them as a single string.
return raw_haiku
def prep_training(raw_haiku):
# Load string, remove newline, split words on spaces, and return list.
# !!!
# translator = str.maketrans('', '', punctuation)
# corpus = raw_haiku.replace('\n',' ').translate(translator).split()
corpus = raw_haiku.replace('\n', ' ').split()
return corpus
def map_word_to_word(corpus):
dict_1to1 = defaultdict(list)
for index, word in enumerate(corpus):
if index < (len(corpus) - 1):
dict_1to1[word].append(corpus[index+1])
logging.debug("map_word_to_word results for \"sake\" = %s\n",
dict_1to1['sake']) #!!!
return dict_1to1
def map_2_words_to_word(corpus):
dict_2to1 = defaultdict(list)
for i in range(len(corpus)):
if i < (len(corpus) - 2):
key = " ".join([corpus[i],corpus[i+1]])
dict_2to1[key].append(corpus[i+2])
logging.debug("map_2_words_to_word results for \"sake jug\" = %s\n",
dict_2to1['sake jug']) #!!!
return dict_2to1
def random_word(corpus):
word = random.choice(corpus)
num_syls = count_syllables.count_syllable(word)
if num_syls > 4:
random_word(corpus)
else:
logging.debug("random word & syllables = %s %s\n", word, num_syls)
return (word, num_syls)
def word_after_single(prefix,suffix_map_1,line_syls,target_syls):
# Return all acceptable words in a corpus that follow a single word.
word_choices = []
suffixes = suffix_map_1.get(prefix)
if suffixes != None:
for candidate in suffixes:
num_syls = count_syllables.count_syllable(candidate)
if num_syls + line_syls <= target_syls:
word_choices.append(candidate)
logging.debug("accepted words after \"%s\" = %s\n",
prefix, set(word_choices))
return word_choices
def word_after_double(prefix, suffix_map_2, line_syls,target_syls):
word_choices = []
suffixes = suffix_map_2.get(prefix)
if suffixes != None:
for candidate in suffixes:
num_syls = count_syllables.count_syllable(candidate)
if num_syls + line_syls <= target_syls:
word_choices.append(candidate)
logging.debug("accepted words after \"%s\" = %s\n",
prefix, set(word_choices))
return word_choices
def haiku_line(suffix_map_1, suffix_map_2, corpus, end_prev_line, target_syls):
# Build a haiku line from a training corpus and return it.
line = '2/3'
line_syls = 0
current_line = []
if len(end_prev_line) == 0: #build first line
line = '1'
# First word
word, num_syls = random_word(corpus)
current_line.append(word)
line_syls += num_syls
# 2nd word
next_word_choices = word_after_single(word, suffix_map_1, line_syls, target_syls)
while len(next_word_choices) == 0:
prefix = random_word(corpus)
logging.debug("new random prefix = %s", prefix)
next_word_choices = word_after_single(prefix, suffix_map_1, line_syls, target_syls)
word = random.choice(next_word_choices)
num_syls = count_syllables.count_syllable(word)
logging.debug("word & syllables = %s %s", word, num_syls)
current_line.append(word)
line_syls += num_syls
if line_syls == target_syls:
end_prev_line.extend(current_line[-2:])
#selects the last two elements
#extend() method is used to add multiple elements to a list.
return current_line, end_prev_line
else: #build line 2 & 3
current_line.extend(end_prev_line)
while True:
logging.debug("line = %s\n", line)
prefix = current_line[-2] + " " + current_line[-1]
next_word_choices = word_after_double(prefix, suffix_map_2, line_syls, target_syls)
while len(next_word_choices) == 0:
index = random.randint(0, len(corpus) - 2)
prefix = corpus[index] + " " + corpus[index+1]
next_word_choices = word_after_double(prefix, suffix_map_2, line_syls, target_syls)
word = random.choice(next_word_choices)
num_syls = count_syllables.count_syllable(word)
logging.debug("word & syllables = %s %s", word, num_syls)
if line_syls + num_syls > target_syls:
continue
#continue statement is used in loops (like for or while)
# to skip the rest of the loop's body & move to the next iteration of the loop.
elif line_syls + num_syls < target_syls:
current_line.append(word)
line_syls += num_syls
elif line_syls + num_syls == target_syls:
current_line.append(word)
break
if line == '1':
final_line = current_line[:]
# we need the [:] so that final_line is an independent list,
# any changes made to final_line will not affect current_line, and vice versa
if line == '2/3':
final_line = current_line[2:]
return final_line, end_prev_line
def main():
"""Give user choice of building a haiku or modifying an existing haiku."""
intro = """\n
A thousand monkeys at a thousand typewriters...
or one computer...can sometimes produce a haiku.\n"""
print("{}".format(intro))
raw_haiku = load_training_file('train.txt')
corpus = prep_training(raw_haiku)
suffix_map_1 = map_word_to_word(corpus)
suffix_map_2 = map_2_words_to_word(corpus)
final = []
choice = None
while choice != "0":
print(
"""
Japanese Haiku Generator
0 - Quit
1 - Generate a Haiku poem
2 - Regenerate Line 2
3 - Regenerate Line 3
"""
)
choice = input('Choice? ')
print()
#exit
if choice == "0":
print('Sayonara.')
sys.exit()
# generate a full haiku
elif choice == "1":
final = []
end_prev_line = []
first_line, end_prev_line1 = haiku_line(suffix_map_1, suffix_map_2, corpus, end_prev_line, 5)
final.append(first_line)
line, end_prev_line2 = haiku_line(suffix_map_1, suffix_map_2, corpus, end_prev_line1, 7)
final.append(line)
line, end_prev_line3 = haiku_line(suffix_map_1, suffix_map_2,
corpus, end_prev_line2, 5)
final.append(line)
elif choice == '2':
if not final:
print('Please generate a full Haiku first (Option 1).')
continue
else:
line, end_prev_line2 = haiku_line(suffix_map_1, suffix_map_2,
corpus, end_prev_line1, 7)
final[1] = line
elif choice == '3':
if not final:
print('Please generate a full Haiku first (Option 1).')
continue
else:
line, end_prev_line3 = haiku_line(suffix_map_1,suffix_map_2,corpus,end_prev_line2, 5)
final[2] = line
# some unknown choice
else:
print('\nSorry. This is not a valid choice. Choose again.')
continue
# Display results
print()
print(" ".join(final[0]), file = sys.stderr)
print(' '.join(final[1]), file = sys.stderr)
print(' '.join(final[2]), file = sys.stderr)
if __name__ == "__main__":
main()