Skip to content

Commit

Permalink
Fixreadwritefile improve some testing (#79)
Browse files Browse the repository at this point in the history
* fixed and improved a few tests...

---------

Co-authored-by: Doug Ransom <[email protected]>
  • Loading branch information
quintijn and dougransom authored Jun 21, 2024
1 parent 983cf48 commit e811846
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 10 deletions.
5 changes: 3 additions & 2 deletions src/natlinkcore/readwritefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,16 @@ class ReadWriteFile:
`encodings` and `encoding` can be overridden at creation of an instance.
`encodings` must then be a list of possible encodings
`encoding` is then
when `encoding` is a str, `encodings` is set to a list only containing this encoding
the default `encodings` are: `['ascii', 'utf-8', 'cp1252', 'latin-1']`
a file can be read via this class, and write back another string, using the same encoding and bom mark
When the encoding is 'ascii' and at write time, non ascii characters are present, care is taken to
encode the output to another encoding, most often (default) 'utf-8'.
When you need a 'utf-16le' encoding (for 'nsapps.ini' of Dragon),
pass `encodings = ['utf16-le']` when creating the instance.
"""
def __init__(self, encodings=None):
self.input_path = ''
Expand Down
2 changes: 1 addition & 1 deletion tests/mock_readwritefile/cp1252.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cp1252 caf
cp1252 euro:
File renamed without changes.
68 changes: 61 additions & 7 deletions tests/test_readwritefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def test_accented_characters_write_file(tmp_path):
# newFile = join(testDir, 'output-accented.txt')
testDir = tmp_path / testFolderName
testDir.mkdir()
newFile = testDir/"outut-accented.txt"
newFile = testDir/"output-accented.txt"
text = 'caf\xe9'
rwfile = ReadWriteFile(encodings=['ascii']) # optional encoding
# this is with default errors='xmlcharrefreplace':
Expand Down Expand Up @@ -89,7 +89,7 @@ def test_other_encodings_write_file(tmp_path):
testDir = tmp_path / testFolderName
testDir.mkdir()

oldFile = mock_readwritefiledir/'latin1 accented.txt'
oldFile = mock_readwritefiledir/'latin1.txt'

rwfile = ReadWriteFile(encodings=['latin1']) # optional encoding
text = rwfile.readAnything(oldFile)
Expand Down Expand Up @@ -133,11 +133,32 @@ def test_nsapps_utf16(tmp_path):
assert encoding2 == 'utf-16le'

def test_latin1_cp1252_write_file(tmp_path):
"""have one latin-1 file and one that is specific cp1252 (euro sign)
Currently both return cp1252, as is is hard to distinguish them and cp1252 is more general
"""
testDir = tmp_path / testFolderName
testDir.mkdir()
_newFile = testDir/ 'latin1.txt'
_newFile = testDir/'cp1252.txt'
assert False, "QH TODO"
mock_files_list = os.listdir(mock_readwritefiledir)

assert 'latin1.txt' in mock_files_list
assert 'cp1252.txt' in mock_files_list

rwfilelatin1 = ReadWriteFile()
rwfilecp1252 = ReadWriteFile()
latin1_path = mock_readwritefiledir/'latin1.txt'
cp1252_path = mock_readwritefiledir/'cp1252.txt'

rwfilelatin1.readAnything(latin1_path)

assert rwfilelatin1.bom == ''
assert rwfilelatin1.encoding == 'cp1252'

rwfilecp1252.readAnything(cp1252_path)
assert rwfilecp1252.bom == ''
assert rwfilecp1252.encoding == 'cp1252'



# TODO (QH) to be done, these encodings do not take all characters,
# and need special attention.
Expand All @@ -151,11 +172,15 @@ def test_read_write_file(tmp_path):
assert len(mock_files_list) > 0

for F in mock_files_list:
encodings = None
if F.startswith('nsapps'):
encodings = ['utf-16le']
continue # utf16-le is not caught by the standard function, but needs its own encoding
if not F.startswith('output-'):
Fout = 'output-' + F
#read the file from the mock folder
F_path = mock_readwritefiledir / F
rwfile = ReadWriteFile()
rwfile = ReadWriteFile(encodings=encodings)
text = rwfile.readAnything(F_path)
trunk, _ext = splitext(F)
Fout = trunk + ".txt"
Expand All @@ -172,13 +197,42 @@ def test_read_write_file(tmp_path):
raise ValueError(f'old: "{F_path}", new: "{Fout_path}", differ at pos {i}: Old: "{o}", new: "{n}", partold (i:i+2): "{parto}", partnew: "{partn}"')

def test_acoustics_ini(tmp_path):
"""this is a utf-8 file with a bom mark. Try also writing back!
"""
testDir = tmp_path / testFolderName
testDir.mkdir()


F='acoustic.ini'
F_path = mock_readwritefiledir/F
rwfile = ReadWriteFile()
config_text = rwfile.readAnything(F_path)
Config = configparser.ConfigParser()
Config.read_string(config_text)
assert Config.get('Acoustics', '2 2') == '2_2'
assert Config.get('Acoustics', '2 2') == '2_2'

newFile1 = 'output1' + F
newPath1 = testDir/newFile1
rwfile.writeAnything(newPath1, config_text)

assert filecmp.cmp(F_path, newPath1)

rwfile2 = ReadWriteFile()
text2 = rwfile2.readAnything(newPath1)
bom2 = rwfile2.bom
encoding2 = rwfile2.encoding

tRaw = rwfile.rawText
tRaw2 = rwfile2.rawText

assert tRaw2 == tRaw
assert text2[0:5] == '[Base'
assert bom2 == [239, 187, 191]
assert encoding2 == 'utf-8'





@pytest.mark.parametrize("F", ['originalnatlink.ini', 'natlinkconfigured.ini'])
def test_config_ini(tmp_path,F):
Expand Down

0 comments on commit e811846

Please sign in to comment.