Fixreadwritefile improve some testing (#79)

* fixed and improved a few tests... --------- Co-authored-by: Doug Ransom <[email protected]>
dictation-toolbox · Jun 21, 2024 · e811846 · e811846
1 parent 983cf48
commit e811846
Show file tree

Hide file tree

Showing 4 changed files with 65 additions and 10 deletions.
diff --git a/src/natlinkcore/readwritefile.py b/src/natlinkcore/readwritefile.py
@@ -38,15 +38,16 @@ class ReadWriteFile:
     
     `encodings` and `encoding` can be overridden at creation of an instance.
     `encodings` must then be a list of possible encodings
-    `encoding` is then 
-    when `encoding` is a str, `encodings` is set to a list only containing this encoding
     
     the default `encodings` are: `['ascii', 'utf-8', 'cp1252',  'latin-1']`
     
     a file can be read via this class, and write back another string, using the same encoding and bom mark
     
     When the encoding is 'ascii' and at write time, non ascii characters are present, care is taken to
     encode the output to another encoding, most often (default) 'utf-8'.
+    
+    When you need a 'utf-16le' encoding (for 'nsapps.ini' of Dragon),
+    pass `encodings = ['utf16-le']` when creating the instance. 
     """
     def __init__(self, encodings=None):
         self.input_path = ''

diff --git a/tests/mock_readwritefile/cp1252.txt b/tests/mock_readwritefile/cp1252.txt
@@ -1 +1 @@
-cp1252 caf�
+cp1252 euro: �
diff --git a/tests/mock_readwritefile/latin1 accented.txt → tests/mock_readwritefile/latin1.txt b/tests/mock_readwritefile/latin1 accented.txt → tests/mock_readwritefile/latin1.txt
diff --git a/tests/test_readwritefile.py b/tests/test_readwritefile.py
@@ -52,7 +52,7 @@ def test_accented_characters_write_file(tmp_path):
  #   newFile = join(testDir, 'output-accented.txt')
     testDir = tmp_path / testFolderName
     testDir.mkdir()
-    newFile = testDir/"outut-accented.txt"
+    newFile = testDir/"output-accented.txt"
     text = 'caf\xe9'
     rwfile = ReadWriteFile(encodings=['ascii'])  # optional encoding
     # this is with default errors='xmlcharrefreplace':
@@ -89,7 +89,7 @@ def test_other_encodings_write_file(tmp_path):
     testDir = tmp_path / testFolderName
     testDir.mkdir()
 
-    oldFile = mock_readwritefiledir/'latin1 accented.txt'
+    oldFile = mock_readwritefiledir/'latin1.txt'
 
     rwfile = ReadWriteFile(encodings=['latin1'])  # optional encoding
     text = rwfile.readAnything(oldFile)
@@ -133,11 +133,32 @@ def test_nsapps_utf16(tmp_path):
     assert encoding2 == 'utf-16le'
 
 def test_latin1_cp1252_write_file(tmp_path):
+    """have one latin-1 file and one that is specific cp1252 (euro sign)
+    
+    Currently both return cp1252, as is is hard to distinguish them and cp1252 is more general
+    """
     testDir = tmp_path / testFolderName
     testDir.mkdir()
-    _newFile = testDir/ 'latin1.txt'
-    _newFile = testDir/'cp1252.txt'
-    assert False, "QH TODO"
+    mock_files_list = os.listdir(mock_readwritefiledir)
+
+    assert 'latin1.txt' in mock_files_list
+    assert 'cp1252.txt' in mock_files_list
+
+    rwfilelatin1 = ReadWriteFile()
+    rwfilecp1252 = ReadWriteFile()
+    latin1_path = mock_readwritefiledir/'latin1.txt'
+    cp1252_path = mock_readwritefiledir/'cp1252.txt'
+
+    rwfilelatin1.readAnything(latin1_path)
+
+    assert rwfilelatin1.bom == ''
+    assert rwfilelatin1.encoding == 'cp1252'
+
+    rwfilecp1252.readAnything(cp1252_path)
+    assert rwfilecp1252.bom == ''
+    assert rwfilecp1252.encoding == 'cp1252'
+
+
 
     # TODO (QH) to be done, these encodings do not take all characters,
     # and need special attention.
@@ -151,11 +172,15 @@ def test_read_write_file(tmp_path):
     assert len(mock_files_list) > 0
 
     for F in mock_files_list:
+        encodings = None
+        if F.startswith('nsapps'):
+            encodings = ['utf-16le']
+            continue    # utf16-le is not caught by the standard function, but needs its own encoding
         if not F.startswith('output-'):
             Fout = 'output-' + F
             #read the file from the mock folder
             F_path =   mock_readwritefiledir / F
-            rwfile = ReadWriteFile()
+            rwfile = ReadWriteFile(encodings=encodings)
             text = rwfile.readAnything(F_path)
             trunk, _ext = splitext(F)
             Fout = trunk + ".txt"
@@ -172,13 +197,42 @@ def test_read_write_file(tmp_path):
                     raise ValueError(f'old: "{F_path}", new: "{Fout_path}", differ at pos {i}: Old: "{o}", new: "{n}", partold (i:i+2): "{parto}", partnew: "{partn}"')
 
 def test_acoustics_ini(tmp_path):
+    """this is a utf-8 file with a bom mark. Try also writing back!
+    """
+    testDir = tmp_path / testFolderName
+    testDir.mkdir()
+
+
     F='acoustic.ini'
     F_path = mock_readwritefiledir/F
     rwfile = ReadWriteFile()
     config_text = rwfile.readAnything(F_path)
     Config = configparser.ConfigParser()
     Config.read_string(config_text)
-    assert Config.get('Acoustics', '2 2') == '2_2' 
+    assert Config.get('Acoustics', '2 2') == '2_2'
+
+    newFile1 = 'output1' + F
+    newPath1 = testDir/newFile1
+    rwfile.writeAnything(newPath1, config_text)
+
+    assert filecmp.cmp(F_path, newPath1)
+
+    rwfile2 = ReadWriteFile() 
+    text2 = rwfile2.readAnything(newPath1)
+    bom2 = rwfile2.bom
+    encoding2 = rwfile2.encoding
+
+    tRaw = rwfile.rawText
+    tRaw2 = rwfile2.rawText
+
+    assert tRaw2 == tRaw
+    assert text2[0:5] == '[Base'
+    assert bom2 == [239, 187, 191]
+    assert encoding2 == 'utf-8'
+
+
+
+
 
 @pytest.mark.parametrize("F", ['originalnatlink.ini', 'natlinkconfigured.ini'])
 def test_config_ini(tmp_path,F):