Skip to content

Commit

Permalink
Merge pull request #100 from holgern/fix_unicodefiles
Browse files Browse the repository at this point in the history
adding unicode read/write tests + workaround windows unicode files
  • Loading branch information
skjerns authored Dec 27, 2020
2 parents 4e17f62 + 49e724c commit 73cab5e
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 5 deletions.
71 changes: 70 additions & 1 deletion pyedflib/_extensions/_pyedflib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ __all__ = ['lib_version', 'CyEdfReader', 'set_patientcode', 'set_starttime_subse


#from c_edf cimport *
import locale
import os
import warnings
cimport c_edf
cimport cpython
import numpy as np
Expand Down Expand Up @@ -85,6 +88,32 @@ FILETYPE_BDF = EDFLIB_FILETYPE_BDF
FILETYPE_BDFPLUS = EDFLIB_FILETYPE_BDFPLUS


def contains_unicode(string):
try:
string.encode('ascii')
return False
except:
return True

def get_short_path_name(long_name):
"""
Gets the short path name of a given long path.
http://stackoverflow.com/a/23598461/200291
"""
import ctypes
from ctypes import wintypes
_GetShortPathNameW = ctypes.windll.kernel32.GetShortPathNameW
_GetShortPathNameW.argtypes = [wintypes.LPCWSTR, wintypes.LPWSTR, wintypes.DWORD]
_GetShortPathNameW.restype = wintypes.DWORD
output_buf_size = 0
while True:
output_buf = ctypes.create_unicode_buffer(output_buf_size)
needed = _GetShortPathNameW(long_name, output_buf, output_buf_size)
if output_buf_size >= needed:
return output_buf.value
else:
output_buf_size = needed

def lib_version():
return c_edf.edflib_version()

Expand Down Expand Up @@ -113,7 +142,29 @@ cdef class CyEdfReader:
EdfReader(file_name, annotations_mode, check_file_size)
"""
self.hdr.handle = -1
self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size)
try:
self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size)
except OSError as e:
# if files contain Unicode on Windows, and the locale is set incorrectly
# there can be errors when creating the file.
# in this case, we can use a workaround and work on the file
# using short file names (DOS style)
exists = os.path.isfile(file_name)
is_windows = os.name == 'nt'
if exists and is_windows and contains_unicode(file_name):
# work-around to at least make Unicode files readable at all
warnings.warn('the filename {} contains Unicode, but Windows does not fully support this. ' \
'Please consider changing your locale to support UTF8. Attempting to '
'load file via workaround (https://github.com/holgern/pyedflib/pull/100) '.format(file_name))
file_name = get_short_path_name(file_name)
self.open(file_name, mode='r', annotations_mode=annotations_mode, check_file_size=check_file_size)
elif exists:
raise OSError('File {} was found but cant be accessed. ' \
'Make sure it contains no special characters ' \
'or change your locale to use UTF8.'.format(file_name))
else:
raise e


def __dealloc__(self):
if self.hdr.handle >= 0:
Expand Down Expand Up @@ -471,6 +522,24 @@ def set_physical_maximum(handle, edfsignal, phys_max):

def open_file_writeonly(path, filetype, number_of_signals):
"""int edfopen_file_writeonly(char *path, int filetype, int number_of_signals)"""

if os.name=='nt' and contains_unicode(path):
default_enc = locale.getdefaultlocale()[1]
if default_enc is None:
default_enc = ''
else:
default_enc = default_enc.lower()
using_unicode = 'utf' in default_enc or 'unicode' in default_enc or \
'10646' in default_enc or default_enc=='cp65001'
# Check if we're on Windows and the file path contains Unicode.
# If so, use workaround to create file: In Python, create the file,
# then look up and pass the short file name to the C library
if not using_unicode:
warnings.warn('Attempting to write Unicode file {} on Windows. ' \
'Consider changing your locale to UTF8.'.format(path))
with open(path, 'wb'): pass
path = get_short_path_name(path)

py_byte_string = _ustring(path).encode('utf8','strict')
cdef char* path_str = py_byte_string
return c_edf.edfopen_file_writeonly(path_str, filetype, number_of_signals)
Expand Down
3 changes: 2 additions & 1 deletion pyedflib/_extensions/c/edflib.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@

/* compile with options "-D_LARGEFILE64_SOURCE -D_LARGEFILE_SOURCE" */


#pragma warning( disable : 4996 ) // ignore unsafe strncpy
#pragma warning( disable : 4244 ) // ignore precision loss

#include "edflib.h"

Expand Down
17 changes: 14 additions & 3 deletions pyedflib/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) 2019 - 2020 Simon Kern
# Copyright (c) 2015 Holger Nahrstaedt

import os, sys
import os, sys, shutil
import numpy as np
# from numpy.testing import (assert_raises, run_module_suite,
# assert_equal, assert_allclose, assert_almost_equal)
Expand All @@ -20,6 +20,7 @@ def setUpClass(cls):
cls.edfplus_data_file = os.path.join(data_dir, 'tmp_test_file_plus.edf')
cls.test_generator = os.path.join(data_dir, 'test_generator.edf')
cls.test_accented = os.path.join(data_dir, "tmp_áä'üöß.edf")
cls.test_unicode = os.path.join(data_dir, "tmp_utf8-中文źąşㆆ운ʷᨄⅡəПр🤖.edf")
cls.anonymized = os.path.join(data_dir, "tmp_anonymized.edf")
cls.personalized = os.path.join(data_dir, "tmp_personalized.edf")
cls.drop_from = os.path.join(data_dir, 'tmp_drop_from.edf')
Expand Down Expand Up @@ -162,8 +163,18 @@ def test_read_write_accented(self):
signals2, _, _ = highlevel.read_edf(self.test_accented)

np.testing.assert_allclose(signals, signals2, atol=0.00002)


# if os.name!='nt':
self.assertTrue(os.path.isfile(self.test_accented), 'File does not exist')

def test_read_unicode(self):
signals = np.random.rand(3, 256*60)
success = highlevel.write_edf_quick(self.edfplus_data_file, signals, sfreq=256)
self.assertTrue(success)
shutil.copy(self.edfplus_data_file, self.test_unicode)
signals2, _, _ = highlevel.read_edf(self.test_unicode)
self.assertTrue(os.path.isfile(self.test_unicode), 'File does not exist')


def test_read_header(self):

header = highlevel.read_edf_header(self.test_generator)
Expand Down

0 comments on commit 73cab5e

Please sign in to comment.