-
Notifications
You must be signed in to change notification settings - Fork 0
/
cmd.py
executable file
·100 lines (84 loc) · 3.3 KB
/
cmd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python3
'''A script to help Spanish learners practice word stresses.
In spanish the stress is within the syllable containing the character with the
stress, there is at most one in any word, the technical words for them are:
Aguda (stress on the final syllable)
Grave/Llana (stress on the penultimate syllable)
Esdrújula (3rd to last)
Sobresdrújula (4th+ syllable)
In the case of aguda/grave there isn't always an accent, but where the stress
is is governed by rules. In the case of esdrújula/sobresdrújula words the
stress is always accented.
'''
import sys
import re
import random
import itertools
filename = "tilde_words"
if len(sys.argv) > 1:
filename = sys.argv[1]
read_stripped_line = lambda fh: fh.readline().strip("\n")
words = []
# Array of all our spanish words
fh = open(filename, mode="r")
word = read_stripped_line(fh)
while word:
words.append(word)
word = read_stripped_line(fh)
fh.close()
class Word:
def __init__(self, word):
self.word = word
def deaccent(self):
'''Replace our accented character with a non-accented character.'''
word = self.word
word = word.replace('á', 'a')
word = word.replace('é', 'e')
word = word.replace('í', 'i')
word = word.replace('ó', 'o')
word = word.replace('ú', 'u')
return word
def syllabicate(self):
"""Splits words into their syllabicated form, for example the words:
superextraordinarísimamente -> su-pe-rex-tra-or-di-na-rí-si-ma-men-te
electroencefalografistas -> e-lec-tro-en-ce-fa-lo-gra-fis-tas
Takes a string, returns an array.
The accent is always in the final four syllabols.
There are quite a few rules in Spanish, and some rare exceptions.
Good explanation: http://i1stspanish.com/syllabication-silabeo-lesson-two
"""
word = self.word
# 1a. Dos vocales se separan si las dos son fuertes.
hard_vowels = [''.join(_) for _ in itertools.combinations('aaooee', 2)]
hard_vowels += ['aí', 'aú', 'oí', 'oú', 'eí', 'eú', 'ía', 'ío', \
'íe', 'úa', 'úo', 'úe']
for vowels in hard_vowels:
word = re.sub(vowels, '{0} {1}'.format(vowels[0], vowels[1]), word)
# 2. Consonants are *normally* split, exceptions added.
consonant_pairs = [''.join(_) for _ in \
itertools.permutations('bcdfghjklmnpqrstvwxyz'*2, 2)]
# The following are kept together
consonant_pairs.remove('ch')
consonant_pairs.remove('ll')
consonant_pairs.remove('rr')
consonant_pairs.remove('dr')
consonant_pairs.remove('tr')
consonant_pairs.remove('bl')
consonant_pairs.remove('cl')
consonant_pairs.remove('fl')
consonant_pairs.remove('gl')
consonant_pairs.remove('pl')
consonant_pairs.remove('br')
consonant_pairs.remove('cr')
consonant_pairs.remove('fr')
consonant_pairs.remove('gr')
consonant_pairs.remove('pr')
for pair in consonant_pairs:
m = re.search(pair, word)
print(word, pair)
return word.split(' ')
#rx_áéíóú = re.compile(r'(.*)([áéíóú])(.*)')
words = random.sample(words, 20)
for word in words:
word = Word(word)
print(word.syllabicate(), word.deaccent())