From 6c114263e0a5f6c8412b238a1e49e7a584f21fc3 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 00:36:07 +0100 Subject: [PATCH 1/8] adding unicode read/write tests --- pyedflib/tests/test_highlevel.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pyedflib/tests/test_highlevel.py b/pyedflib/tests/test_highlevel.py index 224b24b..5ec3812 100644 --- a/pyedflib/tests/test_highlevel.py +++ b/pyedflib/tests/test_highlevel.py @@ -2,7 +2,7 @@ # Copyright (c) 2019 - 2020 Simon Kern # Copyright (c) 2015 Holger Nahrstaedt -import os, sys +import os, sys, shutil import numpy as np # from numpy.testing import (assert_raises, run_module_suite, # assert_equal, assert_allclose, assert_almost_equal) @@ -20,6 +20,7 @@ def setUpClass(cls): cls.edfplus_data_file = os.path.join(data_dir, 'tmp_test_file_plus.edf') cls.test_generator = os.path.join(data_dir, 'test_generator.edf') cls.test_accented = os.path.join(data_dir, "tmp_áä'üöß.edf") + cls.test_unicode = os.path.join(data_dir, "tmp_utf8-中文źąşㆆ운ʷᨄⅡəПр🤖.edf") cls.anonymized = os.path.join(data_dir, "tmp_anonymized.edf") cls.personalized = os.path.join(data_dir, "tmp_personalized.edf") cls.drop_from = os.path.join(data_dir, 'tmp_drop_from.edf') @@ -162,8 +163,16 @@ def test_read_write_accented(self): signals2, _, _ = highlevel.read_edf(self.test_accented) np.testing.assert_allclose(signals, signals2, atol=0.00002) - - + self.assertTrue(os.path.isfile(self.test_accented), 'File does not exist') + + def test_read_unicode(self): + signals = np.random.rand(3, 256*60) + success = highlevel.write_edf_quick(self.edfplus_data_file, signals, sfreq=256) + self.assertTrue(success) + shutil.copy(self.edfplus_data_file, self.test_unicode) + signals2, _, _ = highlevel.read_edf(self.test_unicode) + + def test_read_header(self): header = highlevel.read_edf_header(self.test_generator) From 4e55a544e87e7a14ec72d8bf836c7b5e0cdeff6b Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 19:18:58 +0100 Subject: [PATCH 2/8] add workaround --- pyedflib/_extensions/_pyedflib.pyx | 37 +++++++++++++++++++++++++++++- pyedflib/tests/test_highlevel.py | 3 ++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index 1142808..89788fd 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -19,6 +19,7 @@ __all__ = ['lib_version', 'CyEdfReader', 'set_patientcode', 'set_starttime_subse #from c_edf cimport * +import os cimport c_edf cimport cpython import numpy as np @@ -85,6 +86,25 @@ FILETYPE_BDF = EDFLIB_FILETYPE_BDF FILETYPE_BDFPLUS = EDFLIB_FILETYPE_BDFPLUS +def get_short_path_name(long_name): + """ + Gets the short path name of a given long path. + http://stackoverflow.com/a/23598461/200291 + """ + import ctypes + from ctypes import wintypes + _GetShortPathNameW = ctypes.windll.kernel32.GetShortPathNameW + _GetShortPathNameW.argtypes = [wintypes.LPCWSTR, wintypes.LPWSTR, wintypes.DWORD] + _GetShortPathNameW.restype = wintypes.DWORD + output_buf_size = 0 + while True: + output_buf = ctypes.create_unicode_buffer(output_buf_size) + needed = _GetShortPathNameW(long_name, output_buf, output_buf_size) + if output_buf_size >= needed: + return output_buf.value + else: + output_buf_size = needed + def lib_version(): return c_edf.edflib_version() @@ -113,7 +133,22 @@ cdef class CyEdfReader: EdfReader(file_name, annotations_mode, check_file_size) """ self.hdr.handle = -1 - self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) + try: + self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) + except OSError as e: + exists = os.path.isfile(file_name) + is_windows = os.name == 'nt' + if exists and is_windows: + # work-around to at least make Unicode files readable at all + file_name = get_short_path_name(file_name) + self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) + elif exists: + raise OSError('File {} was found but cant be accessed. ' \ + 'Make sure it contains no special characters ' \ + 'or change your locale to use UTF8.'.format(file_name)) + else: + raise e + def __dealloc__(self): if self.hdr.handle >= 0: diff --git a/pyedflib/tests/test_highlevel.py b/pyedflib/tests/test_highlevel.py index 5ec3812..3c8661e 100644 --- a/pyedflib/tests/test_highlevel.py +++ b/pyedflib/tests/test_highlevel.py @@ -163,7 +163,8 @@ def test_read_write_accented(self): signals2, _, _ = highlevel.read_edf(self.test_accented) np.testing.assert_allclose(signals, signals2, atol=0.00002) - self.assertTrue(os.path.isfile(self.test_accented), 'File does not exist') + if os.name!='nt': + self.assertTrue(os.path.isfile(self.test_accented), 'File does not exist') def test_read_unicode(self): signals = np.random.rand(3, 256*60) From a099e03529abfeadea84a9a9df3c644b306e46ca Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 19:32:00 +0100 Subject: [PATCH 3/8] ignore warnings --- pyedflib/_extensions/_pyedflib.pyx | 13 +++++++++++++ pyedflib/_extensions/c/edflib.c | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index 89788fd..ce88e92 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -136,6 +136,10 @@ cdef class CyEdfReader: try: self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) except OSError as e: + # if files contain Unicode on Windows, and the locale is set incorrectly + # there can be errors when creating the file. + # in this case, we can use a workaround and work on the file + # using short file names (DOS style) exists = os.path.isfile(file_name) is_windows = os.name == 'nt' if exists and is_windows: @@ -506,6 +510,15 @@ def set_physical_maximum(handle, edfsignal, phys_max): def open_file_writeonly(path, filetype, number_of_signals): """int edfopen_file_writeonly(char *path, int filetype, int number_of_signals)""" + if os.name=='nt': # on + try: # on Windows: Check if path contains Unicode + path.encode('ascii') + except: + # If so, use workaround to create file: + # create in Python, then pass the short file name to the C library + with open(path, 'wb'): + pass + path = get_short_path_name(path) py_byte_string = _ustring(path).encode('utf8','strict') cdef char* path_str = py_byte_string return c_edf.edfopen_file_writeonly(path_str, filetype, number_of_signals) diff --git a/pyedflib/_extensions/c/edflib.c b/pyedflib/_extensions/c/edflib.c index 24aa7eb..733121f 100644 --- a/pyedflib/_extensions/c/edflib.c +++ b/pyedflib/_extensions/c/edflib.c @@ -36,7 +36,8 @@ /* compile with options "-D_LARGEFILE64_SOURCE -D_LARGEFILE_SOURCE" */ - +#pragma warning( disable : 4996 ) // ignore unsafe strncpy +#pragma warning( disable : 4244 ) // ignore precision loss #include "edflib.h" From e733ffbfbb2dc2e1b646c28b0b699bc6d1ef78ea Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 19:32:00 +0100 Subject: [PATCH 4/8] fix test failing and ignore warnings --- pyedflib/_extensions/_pyedflib.pyx | 25 ++++++++++++++++++++++++- pyedflib/_extensions/c/edflib.c | 3 ++- pyedflib/tests/test_highlevel.py | 5 +++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index 89788fd..e8babe4 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -20,6 +20,7 @@ __all__ = ['lib_version', 'CyEdfReader', 'set_patientcode', 'set_starttime_subse #from c_edf cimport * import os +import warnings cimport c_edf cimport cpython import numpy as np @@ -86,6 +87,13 @@ FILETYPE_BDF = EDFLIB_FILETYPE_BDF FILETYPE_BDFPLUS = EDFLIB_FILETYPE_BDFPLUS +def contains_unicode(string): + try: + string.encode('ascii') + return False + except: + return True + def get_short_path_name(long_name): """ Gets the short path name of a given long path. @@ -136,12 +144,19 @@ cdef class CyEdfReader: try: self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) except OSError as e: + # if files contain Unicode on Windows, and the locale is set incorrectly + # there can be errors when creating the file. + # in this case, we can use a workaround and work on the file + # using short file names (DOS style) exists = os.path.isfile(file_name) is_windows = os.name == 'nt' - if exists and is_windows: + if exists and is_windows and contains_unicode(file_name): # work-around to at least make Unicode files readable at all file_name = get_short_path_name(file_name) self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) + warnings.warn('the filename {} contains Unicode, but Windows does not fully support this. ' \ + 'Please consider changing your locale to support UTF8. Attempting to ' + 'load file via workaround (https://github.com/holgern/pyedflib/pull/100) '.format(file_name)) elif exists: raise OSError('File {} was found but cant be accessed. ' \ 'Make sure it contains no special characters ' \ @@ -506,6 +521,14 @@ def set_physical_maximum(handle, edfsignal, phys_max): def open_file_writeonly(path, filetype, number_of_signals): """int edfopen_file_writeonly(char *path, int filetype, int number_of_signals)""" + + if os.name=='nt' and contains_unicode(path): + # Check if we're on Windows and the file path contains Unicode. + # If so, use workaround to create file: In Python, create the file, + # then look up and pass the short file name to the C library + with open(path, 'wb'): pass + path = get_short_path_name(path) + py_byte_string = _ustring(path).encode('utf8','strict') cdef char* path_str = py_byte_string return c_edf.edfopen_file_writeonly(path_str, filetype, number_of_signals) diff --git a/pyedflib/_extensions/c/edflib.c b/pyedflib/_extensions/c/edflib.c index 24aa7eb..733121f 100644 --- a/pyedflib/_extensions/c/edflib.c +++ b/pyedflib/_extensions/c/edflib.c @@ -36,7 +36,8 @@ /* compile with options "-D_LARGEFILE64_SOURCE -D_LARGEFILE_SOURCE" */ - +#pragma warning( disable : 4996 ) // ignore unsafe strncpy +#pragma warning( disable : 4244 ) // ignore precision loss #include "edflib.h" diff --git a/pyedflib/tests/test_highlevel.py b/pyedflib/tests/test_highlevel.py index 3c8661e..1e2b69e 100644 --- a/pyedflib/tests/test_highlevel.py +++ b/pyedflib/tests/test_highlevel.py @@ -163,8 +163,8 @@ def test_read_write_accented(self): signals2, _, _ = highlevel.read_edf(self.test_accented) np.testing.assert_allclose(signals, signals2, atol=0.00002) - if os.name!='nt': - self.assertTrue(os.path.isfile(self.test_accented), 'File does not exist') + # if os.name!='nt': + self.assertTrue(os.path.isfile(self.test_accented), 'File does not exist') def test_read_unicode(self): signals = np.random.rand(3, 256*60) @@ -172,6 +172,7 @@ def test_read_unicode(self): self.assertTrue(success) shutil.copy(self.edfplus_data_file, self.test_unicode) signals2, _, _ = highlevel.read_edf(self.test_unicode) + self.assertTrue(os.path.isfile(self.test_unicode), 'File does not exist') def test_read_header(self): From 60b2027a39f98cc13b2a097b93733be48302aed9 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 20:20:13 +0100 Subject: [PATCH 5/8] change position of warning --- pyedflib/_extensions/_pyedflib.pyx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index e8babe4..44c3282 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -152,11 +152,11 @@ cdef class CyEdfReader: is_windows = os.name == 'nt' if exists and is_windows and contains_unicode(file_name): # work-around to at least make Unicode files readable at all - file_name = get_short_path_name(file_name) - self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) warnings.warn('the filename {} contains Unicode, but Windows does not fully support this. ' \ 'Please consider changing your locale to support UTF8. Attempting to ' 'load file via workaround (https://github.com/holgern/pyedflib/pull/100) '.format(file_name)) + file_name = get_short_path_name(file_name) + self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size) elif exists: raise OSError('File {} was found but cant be accessed. ' \ 'Make sure it contains no special characters ' \ @@ -522,10 +522,12 @@ def set_physical_maximum(handle, edfsignal, phys_max): def open_file_writeonly(path, filetype, number_of_signals): """int edfopen_file_writeonly(char *path, int filetype, int number_of_signals)""" - if os.name=='nt' and contains_unicode(path): + if os.name=='nt' and contains_unicode(path) and locale.getlocale()[1].lower()!='utf8': # Check if we're on Windows and the file path contains Unicode. # If so, use workaround to create file: In Python, create the file, # then look up and pass the short file name to the C library + warnings.warn('Attempting to write Unicode file {} on Windows. ' \ + 'Consider chaning your locale to UTF8.'.format(path)) with open(path, 'wb'): pass path = get_short_path_name(path) From ecb8da9d7263170ff7768ddbfb94c14a7e48458b Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 20:22:48 +0100 Subject: [PATCH 6/8] forgot to add import --- pyedflib/_extensions/_pyedflib.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index 44c3282..03104b3 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -19,6 +19,7 @@ __all__ = ['lib_version', 'CyEdfReader', 'set_patientcode', 'set_starttime_subse #from c_edf cimport * +import locale import os import warnings cimport c_edf From 7cfc12b106d044621c872b40715d2110d40dab19 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 21:03:50 +0100 Subject: [PATCH 7/8] more aliases of unicode --- pyedflib/_extensions/_pyedflib.pyx | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index 03104b3..ecb0410 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -523,14 +523,22 @@ def set_physical_maximum(handle, edfsignal, phys_max): def open_file_writeonly(path, filetype, number_of_signals): """int edfopen_file_writeonly(char *path, int filetype, int number_of_signals)""" - if os.name=='nt' and contains_unicode(path) and locale.getlocale()[1].lower()!='utf8': + if os.name=='nt' and contains_unicode(path): + default_enc = locale.getdefaultlocale()[1] + if default_enc is None: + default_enc = '' + else: + default_enc = default_enc.lower() + using_unicode = 'utf' in default_enc or 'unicode' in default_enc or \ + '10646' in default_enc or default_enc=='cp65001' # Check if we're on Windows and the file path contains Unicode. # If so, use workaround to create file: In Python, create the file, # then look up and pass the short file name to the C library - warnings.warn('Attempting to write Unicode file {} on Windows. ' \ - 'Consider chaning your locale to UTF8.'.format(path)) - with open(path, 'wb'): pass - path = get_short_path_name(path) + if using_unicode: + warnings.warn('Attempting to write Unicode file {} on Windows. ' \ + 'Consider chaning your locale to UTF8.'.format(path)) + with open(path, 'wb'): pass + path = get_short_path_name(path) py_byte_string = _ustring(path).encode('utf8','strict') cdef char* path_str = py_byte_string From 49e724c0c26f5a5a5d18c6dc6703f4a507a55dc8 Mon Sep 17 00:00:00 2001 From: skjerns <14980558+skjerns@users.noreply.github.com> Date: Sat, 26 Dec 2020 21:06:27 +0100 Subject: [PATCH 8/8] typo --- pyedflib/_extensions/_pyedflib.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyedflib/_extensions/_pyedflib.pyx b/pyedflib/_extensions/_pyedflib.pyx index ecb0410..9c81a04 100644 --- a/pyedflib/_extensions/_pyedflib.pyx +++ b/pyedflib/_extensions/_pyedflib.pyx @@ -534,9 +534,9 @@ def open_file_writeonly(path, filetype, number_of_signals): # Check if we're on Windows and the file path contains Unicode. # If so, use workaround to create file: In Python, create the file, # then look up and pass the short file name to the C library - if using_unicode: + if not using_unicode: warnings.warn('Attempting to write Unicode file {} on Windows. ' \ - 'Consider chaning your locale to UTF8.'.format(path)) + 'Consider changing your locale to UTF8.'.format(path)) with open(path, 'wb'): pass path = get_short_path_name(path)