Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
added command line option --apostrophe
It is required for Ukrainian support because apostrophe is a frequently used letter there
  • Loading branch information
Alter-1 authored May 11, 2019
1 parent b069265 commit 76749e6
Showing 1 changed file with 16 additions and 5 deletions.
21 changes: 16 additions & 5 deletions langstats/mkpairmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,25 @@
maxrank = 64

def Usage():
print "Usage: mkchartoorder.py <charstats file> <text reference file>"
print "Usage: mkchartoorder.py [--apostrophe] <charstats file> <text reference file>"
sys.exit(1)

if len(sys.argv) != 3:
if len(sys.argv) < 3:
Usage()

charstats = sys.argv[1]
reftext = sys.argv[2]
if sys.argv[1] == "--apostrophe":
# required for Ukrainian because apostrophe is used as frequently used letter there
if len(sys.argv) != 4:
Usage()
apostrophe_code = 0x27
charstats = sys.argv[2]
reftext = sys.argv[3]
else:
if len(sys.argv) != 3:
Usage()
apostrophe_code = -1
charstats = sys.argv[1]
reftext = sys.argv[2]

# print "Charstats file:", charstats, "Ref text:", reftext

Expand All @@ -62,7 +73,7 @@ def Usage():
# Eliminate the common control/punctuation areas. Note that this is only
# the ascii control / punctuation because the winxxx encodings have
# lexical characters in the 80-a0 area
if bytevalue <= 0x40 or \
if (bytevalue <= 0x40 and bytevalue != apostrophe_code) or \
(bytevalue >= 0x5b and bytevalue <= 0x60) or \
(bytevalue >= 0x7b and bytevalue <= 0x7f):
continue
Expand Down

0 comments on commit 76749e6

Please sign in to comment.