Skip to content

Commit

Permalink
Adding files from the learntamil repo
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashwin Balamohan committed Jan 9, 2013
1 parent 8253cdd commit f26e074
Show file tree
Hide file tree
Showing 19 changed files with 3,696 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
v<0.1>, <2012-01-09> -- Initial release.
186 changes: 186 additions & 0 deletions LICENSE.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions amuthaa/CHANGES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
v<0.1>, <2012-01-09> -- Initial release.
179 changes: 179 additions & 0 deletions amuthaa/Chol.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# -*- coding: utf-8 -*-
#!/usr/bin/python

# import Ezhuthu class
from Ezhuthu import Ezhuthu


# Add skeletal outline for functions. Removed redundant function. Modified import command

class Chol:
"""
Module to handle the processing of Tamil words
"""

def __init__(self, text=u''):
""" Constructor for Chol class. Takes a unicode string composed of Tamil characters as an optional input. """

self._text = u''
self._syllables = []
self._letters = []

# if text is entered, call the text property's setter (which generates the syllables and letters lists)
if text: self.text(text)


def __getitem__(self,index):
""" Read the letter at the given position in the letters list """

self._letters[index]


def __setitem__(self, index, value):
""" Modify a member at the given position in the letters list """

self._letters[index] = value


@property
def text(self):
return self._text

@text.setter
def text(self, text):

# if valid text is entered, calculate syllables and letters

if self.validate():
self.text = text

self._letters = self.split_letters(text)
#TODO: calculate syllables

# if invalid text is entered, initialize to empty string
else:
self.text = u''


@property
def syllables(self):
return self._syllables

@property
def letters(self):
return self._letters


def validate(self):
""" Checks whether the given word is valid """

# simple test: every element of the string has to be a valid Tamil character
for codepoint in self.text:
Ezhuthu.validate_letter(codepoint)


#TODO: implement method this more thoroughly
#TODO: check for pulli or combination_ending at the beginning of a word

return True


@staticmethod
def split_letters(word=u''):
""" Returns the graphemes (i.e. the Tamil characters) in a given word as a list """

# ensure that the word is a valid word
Chol.validate_word(word)

# list (which will be returned to user)
letters = []

# a tuple of all combination endings and of all அ combinations
combination_endings = Ezhuthu.get_combination_endings()
a_combinations = Ezhuthu.get_combination_column(u'அ').values()

# loop through
for codepoint in word:

# if codepoint is an அ combination, a vowel, aytham or a space, add it to the list
if codepoint in a_combinations or Ezhuthu.is_whitespace(codepoint) or Ezhuthu.is_vowel(codepoint) or Ezhuthu.is_aytham(codepoint):
letters.append(codepoint)

# if codepoint is a combination ending or a pulli ('்'), add it to the end of the previously-added codepoint
elif codepoint in combination_endings or codepoint==Ezhuthu.get_pulli():

# ensure that at least one character already exists
if len(letters) > 0:
letters[-1] = letters[-1] + codepoint

# otherwise raise an Error. validate_word() should catch this, however
else:
raise ValueError("Unknown error: The combination ending %s cannot be the first character of a word" %(codepoint))

# if codepoint was neither a vowel, aytham, a pulli or a combination ending, an unexpected error has occurred
else:
raise ValueError("Unknown error: The codepoint \'%s\' in word %s is neither a vowel, consonant, combination or aytham" %(codepoint, word))


#TODO: Write extensive test cases for this

return letters

@staticmethod
def print_letters(word = u'', delimiter="-"):
""" Prints all the graphemes in a word, separated by a delimiter (default: "-") """

for letter in Chol.get_letters(word):
print letter, delimiter,

# go to new line
print


@staticmethod
def split_syllables(letters = []):
""" Returns the syllables in a given word as a list """

## Generic algorithm:
## Each vowel and combination is its own syllable. Consonants and aytham get added to the end of the previous syllable

# ensure that the word is a valid word
Chol.validate_word(''.join(letters))

# initialize empty list
syllables = []

# loop through letters in the word
for letter in letters:

# if letter is a vowel or combination, it gets its own syllable
if Ezhuthu.is_combination(letter) or Ezhuthu.is_vowel(letter):
syllables.append(letter)

# if codepoint is a consonant or aytham, add it to the end of the previously-added codepoint
elif Ezhuthu.is_consonant(letter) or Ezhuthu.is_aytham(letter):

# ensure that at least one character already exists
if len(syllables) > 0:
syllables[-1] = syllables[-1] + letter

# if the first letter is a consonant (probably b/c it' s a loanword), add it to the beginning of the string
else:
syllables.append(letter)

# if codepoint was neither a vowel, aytham, a pulli or a combination ending, an unexpected error has occurred
else:
raise Exception("Unknown error: The letter \'%s\' in word %s is neither a vowel, consonant, combination or aytham" %(letter, ''.join(letters)))


#TODO: Write extensive test cases for this










10 changes: 10 additions & 0 deletions amuthaa/EzhuthUrai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
#!/usr/bin/python

class EzhuthUrai:
"""
A module to handle the processing of Tamil passages (from sentences to short stories)
"""

#TODO: Implement this
pass
Loading

0 comments on commit f26e074

Please sign in to comment.