diff --git a/mdfreader/mdf.py b/mdfreader/mdf.py index dadb529..b7463e7 100644 --- a/mdfreader/mdf.py +++ b/mdfreader/mdf.py @@ -91,7 +91,7 @@ class MdfSkeleton(dict): def __init__(self, file_name=None, channel_list=None, convert_after_read=True, filter_channel_names=False, no_data_loading=False, - compression=False, convert_tables=False, metadata=2): + compression=False, convert_tables=False, metadata=2, source_list=None): """ mdf_skeleton class constructor. Parameters @@ -123,6 +123,10 @@ def __init__(self, file_name=None, channel_list=None, convert_after_read=True, convert_tables : bool, optional, default False flag to convert or not only conversions with tables. These conversions types take generally long time and memory. + + source_list: list, optional, default None + list containing the source messages to identify what device send the + different message. """ self.masterChannelList = OrderedDict() # flag to control multiprocessing, default deactivate, @@ -152,7 +156,8 @@ def __init__(self, file_name=None, channel_list=None, convert_after_read=True, filter_channel_names=filter_channel_names, no_data_loading=no_data_loading, compression=compression, - metadata=metadata) + metadata=metadata, + source_list=source_list) def add_channel(self, channel_name, data, master_channel, master_type=1, unit='', description='', conversion=None, info=None, compression=False, identifier=None): diff --git a/mdfreader/mdf4reader.py b/mdfreader/mdf4reader.py index 5b22e99..64dd541 100644 --- a/mdfreader/mdf4reader.py +++ b/mdfreader/mdf4reader.py @@ -35,14 +35,17 @@ from numpy.lib.recfunctions import rename_fields from numpy.ma import MaskedArray from warnings import simplefilter, warn +import pandas as pd from .mdfinfo4 import Info4, IDBlock, HDBlock, DGBlock, \ CGBlock, CNBlock, FHBlock, CommentBlock, _load_header, DLBlock, \ DZBlock, HLBlock, CCBlock, DTBlock, CABlock, DVBlock, LDBlock from .mdf import MdfSkeleton, _open_mdf, invalidChannel, dataField, \ conversionField, idField, invalidPosField, CompressedData from .channel import Channel4 + try: from dataRead import sorted_data_read, unsorted_data_read4, sd_data_read + dataRead_available = True except ImportError: warn('dataRead cannot be imported, compile it with Cython', ImportWarning) @@ -87,11 +90,11 @@ def _data_block(record, info, parent_block, channel_set=None, n_records=None, so n_records = record.numberOfRecords if parent_block['id'] in (b'##DT', b'##DV', b'##RD', '##DT', '##RD', '##DV'): # normal data block if sorted_flag: - if channel_set is None and not record.hiddenBytes and\ + if channel_set is None and not record.hiddenBytes and \ record.byte_aligned: # No channel list and length of records corresponds to C datatypes # for debugging purpose # print(n_records, record.numpyDataRecordFormat, record.dataRecordName) - if info['DG'][record.dataGroup]['unique_channel_in_DG']and parent_block['id'] in (b'##DV', '##DV'): + if info['DG'][record.dataGroup]['unique_channel_in_DG'] and parent_block['id'] in (b'##DV', '##DV'): return frombuffer(parent_block['data'], dtype={'names': record.dataRecordName, 'formats': record.numpyDataRecordFormat}) else: @@ -175,11 +178,12 @@ def _read_unsorted(record, info, parent_block, record_id_size): VLSD_flag[record_id] = True VLSD[record[record_id]['record'].VLSD_CG[record_id]['channelName']] = [] VLSD_CG_name[record_id] = record[record_id]['record'].VLSD_CG[record_id]['channelName'] - VLSD_CG_signal_data_type[record_id] = record[record_id]['record'].VLSD_CG[record_id]['channel'].signal_data_type(info) + VLSD_CG_signal_data_type[record_id] = record[record_id]['record'].VLSD_CG[record_id][ + 'channel'].signal_data_type(info) else: VLSD_flag[record_id] = False for Channel in record[record_id]['record'].values(): - #if not Channel.VLSD_CG_Flag: + # if not Channel.VLSD_CG_Flag: buf[Channel.name] = empty((record[record_id]['record'].numberOfRecords,), dtype='V{}'.format(Channel.nBytes_aligned)) numpy_format[Channel.name] = Channel.data_format(info) @@ -281,7 +285,7 @@ def _read_sd_block(signal_data_type, sd_block, sd_block_length, n_records, point output = empty((n_records,), dtype="V{:d}".format(max_len)) for index, position in enumerate(pointer): position = int(position + 4) - output[index] = bytearray(sd_block[position:int(position + VLSDLen[index])]).rjust(max_len, b'\x00') + output[index] = bytearray(sd_block[position:int(position + VLSDLen[index])]).rjust(max_len, b'\x00') return output else: warn('VLSD channel is empty') @@ -383,7 +387,8 @@ def read(self, channel_set, info, filename): if temp is not None: # change channel name by appending offset self[recordID]['data'] = rename_fields(self[recordID]['data'], - {record[cn].name: '{}_offset'.format(record[cn].name)}) + {record[cn].name: '{}_offset'.format( + record[cn].name)}) self[recordID]['VLSD'][record[cn].name] = temp else: # unsorted DataGroup self.type = 'unsorted' @@ -436,7 +441,7 @@ def load(self, record, info, name_list=None, sorted_flag=True, vlsd=None): while temps['next']: # reads pointers to all data blocks (DT, RD, SD, DZ) temp = defaultdict() temp.update(_load_header(self.fid, temps['next'])) - (temps['next'], ) = structunpack('= 8 * prev_chan_byte_offset \ - and channel_pos_bit_end <= 8 * (prev_chan_byte_offset + prev_chan_n_bytes) + and channel_pos_bit_end <= 8 * ( + prev_chan_byte_offset + prev_chan_n_bytes) if embedding_channel is not None: embedding_channel_includes_curr_chan = \ channel_pos_bit_end <= embedding_channel.pos_byte_end(info) * 8 @@ -1024,7 +1031,8 @@ def read_record_buf(self, buf, info, channel_set=None): for Channel in self.values(): # list of channel classes from channelSet if Channel.name in channel_set and not Channel.VLSD_CG_Flag: temp[Channel.name] = \ - Channel.c_format_structure(info).unpack(buf[Channel.pos_byte_beg(info):Channel.pos_byte_end(info)])[0] + Channel.c_format_structure(info).unpack(buf[Channel.pos_byte_beg(info):Channel.pos_byte_end(info)])[ + 0] return temp # returns dictionary of channel with its corresponding values def initialise_recarray(self, info, channel_set, n_records, dtype=None, channels_indexes=None): @@ -1152,6 +1160,7 @@ def signed_int(temp, extension): temp[i].extend(extension_inv) temp[i].append(sign_bit) return temp + if n_records is None: n_records = self.numberOfRecords if dtype is None: @@ -1169,7 +1178,7 @@ def signed_int(temp, extension): n_bytes_estimated = self[chan].nBytes_aligned if not self[chan].type in (1, 2): temp = [bit_array[self[chan].pos_bit_beg + record_bit_size * i: - self[chan].pos_bit_end(info) + record_bit_size * i] + self[chan].pos_bit_end(info) + record_bit_size * i] for i in range(n_records)] n_bytes = len(temp[0].tobytes()) if not n_bytes == n_bytes_estimated and \ @@ -1181,7 +1190,7 @@ def signed_int(temp, extension): temp[i].extend(byte) else: # signed integer (two's complement), keep sign bit and extend with bytes temp = signed_int(temp, byte) - n_trail_bits = n_bytes_estimated*8 - self[chan].bit_count(info) + n_trail_bits = n_bytes_estimated * 8 - self[chan].bit_count(info) if signal_data_type in (2, 3) and \ n_bytes == n_bytes_estimated and \ n_trail_bits > 0: # C type byte length but signed integer @@ -1194,7 +1203,7 @@ def signed_int(temp, extension): if 's' not in self[chan].c_format(info): c_structure = self[chan].c_format_structure(info) if ('>' in self[chan].data_format(info) and byteorder == 'little') or \ - (byteorder == 'big' and '<' in self[chan].data_format(info)): + (byteorder == 'big' and '<' in self[chan].data_format(info)): temp = [c_structure.unpack(temp[i].tobytes())[0] for i in range(n_records)] temp = asarray(temp).byteswap().newbyteorder() @@ -1213,7 +1222,6 @@ def signed_int(temp, extension): class Mdf4(MdfSkeleton): - """ mdf file reader class from version 4.0 to 4.1.1 Attributes @@ -1256,7 +1264,7 @@ class Mdf4(MdfSkeleton): """ def read4(self, file_name=None, info=None, multi_processed=False, channel_list=None, convert_after_read=True, - filter_channel_names=False, compression=False, metadata=2): + filter_channel_names=False, compression=False, metadata=2, source_list=None): """ Reads mdf 4.x file data and stores it in dict Parameters @@ -1294,6 +1302,10 @@ def read4(self, file_name=None, info=None, multi_processed=False, channel_list=N 1: used for noDataLoading 0: all metadata reading, including Source Information, Attachment, etc.. + source_list: list, optional, default None + list containing the source messages to identify what device send the + different message. + """ self.multiProc = multi_processed @@ -1312,13 +1324,30 @@ def read4(self, file_name=None, info=None, multi_processed=False, channel_list=N else: channel_set_file = None + # if source_list is not None: + # source_set_file = set(source_list) + # Read information block from file - if info is None: - if self.info is None: - info = Info4(self.fileName, None, - filter_channel_names=filter_channel_names, minimal=minimal) - else: - info = self.info + # If the source list is present, the information block from file are organised + # depending on the different source message in such a way we can match any signal + # the own source message (device from the signal comes). + + if source_list is None: + if info is None: + if self.info is None: + info = Info4(self.fileName, None, + filter_channel_names=filter_channel_names, minimal=minimal) + else: + info = self.info + else: + multi_sources = 1 + if info is None: + if self.info is None: + info = Info4(self.fileName, None, + filter_channel_names=filter_channel_names, minimal=minimal, + multi_sources=multi_sources) + else: + info = self.info if info.fid is None or info.fid.closed: info.fid = open(self.fileName, 'rb') @@ -1341,11 +1370,13 @@ def read4(self, file_name=None, info=None, multi_processed=False, channel_list=N if 1 << 1 & info['HD']['hd_time_flags']: # timezone and daylight applicable ttime = info['HD']['hd_tz_offset_min'] * 60 + info['HD']['hd_dst_offset_min'] * 60 + def returnField(obj, field): try: return obj[field] except KeyError: return '' + if 'Comment' in info['HD']: Comment = info['HD']['Comment'] author = returnField(Comment, 'author') @@ -1363,14 +1394,41 @@ def returnField(obj, field): if self._noDataLoading and channel_list is not None: data_groups = [self[channel][idField][0][0] for channel in channel_list] + if source_list is not None: + source_dataFrame = pd.DataFrame(source_list, columns=['source_list']) + # there is just a source occurrence + keep_first_source = list(source_dataFrame.drop_duplicates(keep="first")['source_list']) + own_source = str() + for dataGroup in data_groups: channel_set = channel_set_file if not info['DG'][dataGroup]['dg_data'] == 0 and \ (channel_set is None or len(channel_set & info['ChannelNamesByDG'][dataGroup]) > 0): # there is data block and channel in + if source_list is not None: + find_own_source = False + skip_this_group = False + # if there is no signal in the channel group + if len(channel_set & info['ChannelNamesByDG'][dataGroup]) < 2 and 't' in list( + channel_set & info['ChannelNamesByDG'][dataGroup]): + skip_this_group = True + # own source is the name of the source message + for source_item in range(len(keep_first_source)): + if len(info['CG'][dataGroup][0]['acq_source']['source_path']) == 0: + own_source = info['CG'][dataGroup][0]['acq_name']['Comment'] + else: + own_source = info['CG'][dataGroup][0]['acq_source']['source_path']['Comment'] + '.' + \ + info['CG'][dataGroup][0]['acq_name']['Comment'] + if keep_first_source[source_item] in own_source: + find_own_source = True + if not find_own_source: + continue + if skip_this_group: + continue + if minimal > 1 and not self._noDataLoading: # load CG, CN and CC block info info.read_cg_blocks(info.fid, dataGroup, channel_set, minimal=minimal) - data_existing_in_data_group =False + data_existing_in_data_group = False for dg in info['CG'][dataGroup]: if info['CG'][dataGroup][dg]['cg_cycle_count']: data_existing_in_data_group = True # data existing @@ -1388,7 +1446,7 @@ def returnField(obj, field): record_id = info['CG'][dataGroup][channelGroup]['cg_record_id'] if temp.master is not None \ and buf[record_id]['record'].channelNames: - if channel_set is not None and not self._noDataLoading\ + if channel_set is not None and not self._noDataLoading \ and temp.master not in channel_set: channel_set.add(temp.master) # adds master channel in channelSet if missing if channel_set is not None and buf[record_id]['record'].CANOpen: @@ -1416,7 +1474,8 @@ def returnField(obj, field): master_channel = buf[record_id]['record'].master if self._noDataLoading and channel_list is not None: - channels = [buf[record_id]['record'][self[channel][idField][0][2]] for channel in channel_list] + channels = [buf[record_id]['record'][self[channel][idField][0][2]] for channel in + channel_list] else: channels = list(buf[record_id]['record'].values()) for chan in channels: # for each channel class @@ -1426,7 +1485,8 @@ def returnField(obj, field): # in case record is used for several channels if channel_set is None and not buf[record_id]['record'].hiddenBytes \ and buf[record_id]['record'].byte_aligned: - record_name = buf[record_id]['record'].recordToChannelMatching[chan.name] + record_name = buf[record_id]['record'].recordToChannelMatching[ + chan.name] else: record_name = chan.name try: # data in channel group @@ -1442,9 +1502,9 @@ def returnField(obj, field): bit_count = chan.bit_count(info) if buf[record_id]['record'].byte_aligned \ and not buf[record_id]['record'].hiddenBytes and \ - channel_set is None and\ + channel_set is None and \ 0 < bit_count < 64 and bit_count not in (8, 16, 32) \ - and temp is not None\ + and temp is not None \ and temp.dtype.kind not in ('S', 'U'): # if channel data do not use complete bytes and Ctypes signal_data_type = chan.signal_data_type(info) @@ -1457,7 +1517,8 @@ def returnField(obj, field): if signal_data_type in (2, 3): # signed integer, moving bit sign of two's complement sign_bit_mask = (1 << (bit_count - 1)) - sign_extend = ((1 << (temp.itemsize * 8 - bit_count)) - 1) << bit_count + sign_extend = ((1 << ( + temp.itemsize * 8 - bit_count)) - 1) << bit_count sign_bit = bitwise_and(temp, sign_bit_mask) for number, sign in enumerate(sign_bit): # negative value, sign extend @@ -1492,6 +1553,12 @@ def returnField(obj, field): temp = temp2 # channel creation + # if the channel is not a time channel, the channel name has the source + # message information in the name + if source_list is not None: + if 't_' not in chan.name and len(own_source) == 0: + chan.name = chan.name + '_' + own_source + self.add_channel(chan.name, temp, master_channel, master_type=chan.channel_sync_type(info), unit=chan.unit(info), description=chan.desc(info), @@ -1516,7 +1583,7 @@ def returnField(obj, field): if not info['DG'][dataGroup]['unique_channel_in_DG']: invalid_data = frombuffer(invalid_data.tobytes(), dtype='u1').reshape(len(invalid_data), - invalid_data.dtype.itemsize) + invalid_data.dtype.itemsize) self.add_channel(chan.name, invalid_data, master_channel, master_type=0, unit='', description='', info=None, compression=compression, identifier=None) @@ -1531,6 +1598,7 @@ def returnField(obj, field): if minimal > 1: # clean CN, CC and CG info to free memory info.clean_dg_info(dataGroup) + info.fid.close() # close file if convert_after_read and not compression: @@ -1857,7 +1925,7 @@ def _write4_non_column(self, fid, pointer, compression=False): last_channel = n_channel data_ndim = data.ndim - 1 if not data_ndim: - data_list = data_list + (data, ) + data_list = data_list + (data,) record_byte_offset += byte_count else: # data contains arrays data_dim_size = data.shape diff --git a/mdfreader/mdfinfo4.py b/mdfreader/mdfinfo4.py index bb6d31c..be46546 100644 --- a/mdfreader/mdfinfo4.py +++ b/mdfreader/mdfinfo4.py @@ -1727,7 +1727,7 @@ def write(self, fid, data): class Info4(dict): - __slots__ = ['fileName', 'fid', 'filterChannelNames', 'zipfile'] + __slots__ = ['fileName', 'fid', 'filterChannelNames', 'zipfile', 'multi_sources'] """ information block parser fo MDF file version 4.x Attributes @@ -1738,6 +1738,10 @@ class Info4(dict): file identifier zipfile flag to indicate the mdf4 is packaged in a zip + + multisources : bool + 0 will filter without source message information + 1 will filter with source message information Notes -------- @@ -1755,7 +1759,7 @@ class Info4(dict): Channel conversion information - mdfinfo['CC'][dataGroup][channelGroup][channel]""" - def __init__(self, file_name=None, fid=None, filter_channel_names=False, minimal=0): + def __init__(self, file_name=None, fid=None, filter_channel_names=False, minimal=0, multi_sources=False): """ info4 class constructor Parameters @@ -1771,6 +1775,10 @@ def __init__(self, file_name=None, fid=None, filter_channel_names=False, minimal 1 will load DG, CG, CN and CC (for noDataLoading) 2 will load only DG (for normal reading) + multisources : bool, default false + 0 will filter without source message information + 1 will filter with source message information + Notes --------- Either fileName or fid can be used as argument""" @@ -1793,7 +1801,7 @@ def __init__(self, file_name=None, fid=None, filter_channel_names=False, minimal # Open file (self.fid, self.fileName, self.zipfile) = _open_mdf(self.fileName) if self.fileName is not None and fid is None: - self.read_info(self.fid, minimal) + self.read_info(self.fid, minimal, multi_sources) # Close the file self.fid.close() if self.zipfile: # temporary uncompressed file, to be removed @@ -1802,7 +1810,7 @@ def __init__(self, file_name=None, fid=None, filter_channel_names=False, minimal # called by mdfreader.mdfinfo self.read_info(fid, minimal) - def read_info(self, fid, minimal): + def read_info(self, fid, minimal, multi_sources=False): """ read all file blocks except data Parameters @@ -1811,6 +1819,8 @@ def read_info(self, fid, minimal): file identifier minimal: flag to activate minimum content reading for raw data fetching + multi_sources: flag + to read same signal with same name from different source message """ # reads IDBlock self['ID'].update(IDBlock(fid)) @@ -1841,9 +1851,9 @@ def read_info(self, fid, minimal): self['EV'] = self.read_ev_block(fid, self['HD']['hd_ev_first']) # reads Data Group Blocks and recursively the other related blocks - self.read_dg_block(fid, False, minimal) + self.read_dg_block(fid, False, minimal, multi_sources) - def read_dg_block(self, fid, channel_name_list=False, minimal=0): + def read_dg_block(self, fid, channel_name_list=False, minimal=0, multi_sources=False): """reads Data Group Blocks Parameters @@ -1852,8 +1862,10 @@ def read_dg_block(self, fid, channel_name_list=False, minimal=0): file identifier channel_name_list : bool Flag to reads only channel blocks for listChannels4 method - minimal: falg + minimal: flag to activate minimum content reading for raw data fetching + multi_sources: flag + to read same signal with same name from different source message """ self['ChannelNamesByDG'] = {} if self['HD']['hd_dg_first']: @@ -1863,7 +1875,7 @@ def read_dg_block(self, fid, channel_name_list=False, minimal=0): self['ChannelNamesByDG'][dg] = set() if minimal < 2: # reads Channel Group blocks - self.read_cg_blocks(fid, dg, channel_name_list, minimal) + self.read_cg_blocks(fid, dg, channel_name_list, minimal, multi_sources) while self['DG'][dg]['dg_dg_next']: dg += 1 self['DG'][dg] = {} @@ -1871,9 +1883,9 @@ def read_dg_block(self, fid, channel_name_list=False, minimal=0): self['ChannelNamesByDG'][dg] = set() if minimal < 2: # reads Channel Group blocks - self.read_cg_blocks(fid, dg, channel_name_list, minimal) + self.read_cg_blocks(fid, dg, channel_name_list, minimal, multi_sources) - def read_cg_blocks(self, fid, dg, channel_name_list=False, minimal=0): + def read_cg_blocks(self, fid, dg, channel_name_list=False, minimal=0, multi_sources=False): """reads Channel Group blocks linked to same Data Block dg Parameters @@ -1886,6 +1898,8 @@ def read_cg_blocks(self, fid, dg, channel_name_list=False, minimal=0): Flag to reads only channel blocks for listChannels4 method minimal: falg to activate minimum content reading for raw data fetching + multi_sources: flag + to read same signal with same name from different source message """ if self['DG'][dg]['dg_cg_first']: cg = 0 @@ -1898,7 +1912,7 @@ def read_cg_blocks(self, fid, dg, channel_name_list=False, minimal=0): vlsd_cg_block = [] vlsd_cg_block = self.read_cg_block(fid, dg, cg, self['DG'][dg]['dg_cg_first'], - vlsd_cg_block, channel_name_list=False, minimal=0) + vlsd_cg_block, channel_name_list=False, minimal=0, multi_sources= multi_sources) if self['CN'][dg][cg] and self['CG'][dg][cg]['unique_channel_in_CG'] and \ not self['CG'][dg][cg]['cg_cg_next']: @@ -1913,7 +1927,7 @@ def read_cg_blocks(self, fid, dg, channel_name_list=False, minimal=0): self['CN'][dg][cg] = dict() self['CC'][dg][cg] = dict() vlsd_cg_block = self.read_cg_block(fid, dg, cg, self['CG'][dg][cg - 1]['cg_cg_next'], - vlsd_cg_block, channel_name_list=False, minimal=0) + vlsd_cg_block, channel_name_list=False, minimal=0, multi_sources= multi_sources) if vlsd_cg_block and 'VLSD_CG' not in self: # VLSD CG Block exiting self['VLSD_CG'] = {} @@ -1929,7 +1943,7 @@ def read_cg_blocks(self, fid, dg, channel_name_list=False, minimal=0): self['VLSD_CG'][self['CG'][dg][VLSDcg]['cg_record_id']] = {'cg_cn': (cg, cn)} break - def read_cg_block(self, fid, dg, cg, pointer, vlsd_cg_block, channel_name_list=False, minimal=0): + def read_cg_block(self, fid, dg, cg, pointer, vlsd_cg_block, channel_name_list=False, minimal=0, multi_sources=False): """reads one Channel Group block Parameters @@ -1942,8 +1956,10 @@ def read_cg_block(self, fid, dg, cg, pointer, vlsd_cg_block, channel_name_list=F channel group number channel_name_list : bool Flag to reads only channel blocks for listChannels4 method - minimal: falg + minimal: flag to activate minimum content reading for raw data fetching + multi_sources: flag + to read same signal with same name from different source message Returns ----------- @@ -1964,7 +1980,7 @@ def read_cg_block(self, fid, dg, cg, pointer, vlsd_cg_block, channel_name_list=F if not self['CG'][dg][cg]['cg_flags'] & 0b1: # if not a VLSD channel group # reads Channel Block - vlsd = self.read_cn_blocks(fid, dg, cg, channel_name_list, minimal) + vlsd = self.read_cn_blocks(fid, dg, cg, channel_name_list, minimal, multi_sources) if vlsd: # VLSD needs to rename and append records but with python 2.x impossible, # convert name to compatible python identifier @@ -1976,7 +1992,7 @@ def read_cg_block(self, fid, dg, cg, pointer, vlsd_cg_block, channel_name_list=F return vlsd_cg_block - def read_cn_blocks(self, fid, dg, cg, channel_name_list=False, minimal=0): + def read_cn_blocks(self, fid, dg, cg, channel_name_list=False, minimal=0, multi_sources=False): """reads Channel blocks link to CG Block Parameters @@ -1991,6 +2007,8 @@ def read_cn_blocks(self, fid, dg, cg, channel_name_list=False, minimal=0): Flag to reads only channel blocks for listChannels4 method minimal: flag to activate minimum content reading for raw data fetching + multi_sources: flag + to read same signal with same name from different source message Returns ----------- @@ -2000,7 +2018,7 @@ def read_cn_blocks(self, fid, dg, cg, channel_name_list=False, minimal=0): vlsd = False mlsd_channels = [] cn, mlsd_channels, vlsd = self.read_cn_block(fid, self['CG'][dg][cg]['cg_cn_first'], - dg, cg, mlsd_channels, vlsd, minimal, channel_name_list) + dg, cg, mlsd_channels, vlsd, minimal, channel_name_list, multi_sources) if not self['CN'][dg][cg][cn]['cn_cn_next']: # only one channel in CGBlock self['CG'][dg][cg]['unique_channel_in_CG'] = True else: @@ -2008,7 +2026,7 @@ def read_cn_blocks(self, fid, dg, cg, channel_name_list=False, minimal=0): while self['CN'][dg][cg][cn]['cn_cn_next']: cn, mlsd_channels, vlsd = self.read_cn_block(fid, self['CN'][dg][cg][cn]['cn_cn_next'], - dg, cg, mlsd_channels, vlsd, minimal, channel_name_list) + dg, cg, mlsd_channels, vlsd, minimal, channel_name_list, multi_sources) if mlsd_channels: if 'MLSD' not in self: @@ -2023,7 +2041,7 @@ def read_cn_blocks(self, fid, dg, cg, channel_name_list=False, minimal=0): break return vlsd - def read_cn_block(self, fid, pointer, dg, cg, mlsd_channels, vlsd, minimal, channel_name_list): + def read_cn_block(self, fid, pointer, dg, cg, mlsd_channels, vlsd, minimal, channel_name_list, multi_sources=False): """reads single Channel block Parameters @@ -2042,6 +2060,8 @@ def read_cn_block(self, fid, pointer, dg, cg, mlsd_channels, vlsd, minimal, chan to activate minimum content reading for raw data fetching channel_name_list : bool Flag to reads only channel blocks for listChannels4 method + multi_sources: flag + to read same signal with same name from different source message Returns ----------- @@ -2074,7 +2094,7 @@ def read_cn_block(self, fid, pointer, dg, cg, mlsd_channels, vlsd, minimal, chan self['CN'][dg][cg][cn]['orig_name'] = self['CN'][dg][cg][cn]['name'] # check if already existing channel name self['CN'][dg][cg][cn]['name'] = \ - self._unique_channel_name(fid, self['CN'][dg][cg][cn]['name'], dg, cg, cn) + self._unique_channel_name(fid, self['CN'][dg][cg][cn]['name'], dg, cg, cn, multi_sources= multi_sources) if self.filterChannelNames: # filters channels modules self['CN'][dg][cg][cn]['name'] = self['CN'][dg][cg][cn]['name'].split('.')[-1] @@ -2259,7 +2279,49 @@ def list_channels4(self, file_name=None, fid=None): fid.close() return channel_name_list - def _unique_channel_name(self, fid, name, dg, cg, cn): + def list_channels_sources4(self, file_name=None, fid=None): + """ Read MDF file and extract its complete structure + + Parameters + ---------------- + file_name : str + file name + fid + + Returns + ----------- + list of channel names and sources messages contained in file + """ + if file_name is not None: + self.fileName = file_name + # Open file + if fid is None and file_name is not None: + # Open file + (fid, file_name, zipfile) = _open_mdf(self.fileName) + channel_name_sources_list = [] + # reads Header HDBlock + self['HD'].update(HDBlock(fid)) + + # reads Data Group, channel groups and channel Blocks + # recursively but not the other metadata block + self.read_dg_block(fid, True) + + for dg in self['DG']: + for cg in self['CG'][dg]: + for cn in self['CN'][dg][cg]: + original_name = self['CN'][dg][cg][cn]['orig_name'] + if len(self['CG'][dg][cg]['acq_source']['source_path']) == 0: + original_source = self['CG'][dg][cg]['acq_name']['Comment'] + else: + original_source = self['CG'][dg][cg]['acq_source']['source_path']['Comment'] \ + + '.' + self['CG'][dg][cg]['acq_name']['Comment'] + channel_name_sources_list.append([original_name, original_source]) + + # CLose the file + fid.close() + return channel_name_sources_list + + def _unique_channel_name(self, fid, name, dg, cg, cn, multi_sources=False): """ generate unique channel name Parameters @@ -2279,6 +2341,9 @@ def _unique_channel_name(self, fid, name, dg, cg, cn): cn : int channel number number + multi_sources: flag + to read same signal with same name from different source message + Returns ----------- channel name made unique @@ -2294,7 +2359,10 @@ def _unique_channel_name(self, fid, name, dg, cg, cn): source_name = cn else: source_name = cn - name = u'{0}_{1}_{2}_{3}'.format(name, dg, cg, source_name) + if multi_sources: + name = u'{0}'.format(name, dg, source_name) + else: + name = u'{0}_{1}_{2}'.format(name, dg, source_name) elif name in self['allChannelList']: # for sorted data if self['CN'][dg][cg][cn]['cn_si_source']: temp = SIBlock() @@ -2305,7 +2373,10 @@ def _unique_channel_name(self, fid, name, dg, cg, cn): source_name = dg else: source_name = dg - name = u'{0}_{1}_{2}'.format(name, dg, source_name) + if multi_sources and name != 't': + name = u'{0}'.format(name, dg, source_name) + else: + name = u'{0}_{1}_{2}'.format(name, dg, source_name) self['ChannelNamesByDG'][dg].add(name) self['allChannelList'].add(name) @@ -2319,7 +2390,7 @@ def _unique_channel_name(self, fid, name, dg, cg, cn): self['CN'][dg][cg][cn]['masterCG'] = self['CG'][dg][cg]['cg_cg_master'] else: try: - self['masters'][self['CG'][dg][cg]['pointer']]['channels'].add(name) + self['masters'][self['CG'][dg][cg]['pointer']]['channels'].add(name) except KeyError: self['masters'][self['CG'][dg][cg]['pointer']] = dict() self['masters'][self['CG'][dg][cg]['pointer']]['channels'] = set() diff --git a/mdfreader/mdfreader.py b/mdfreader/mdfreader.py index 673240a..4b1acd6 100644 --- a/mdfreader/mdfreader.py +++ b/mdfreader/mdfreader.py @@ -81,11 +81,12 @@ def clean_name(name): if c in allowed_str: buf += c return buf + channel_name = clean_name(channel_name) # limit the variable length at 63 character, Matlab limitation # if you use long names including modules names separated by a '.' # you can use filter_channel_names=True parameter - channel_name= channel_name[:63] + channel_name = channel_name[:63] return channel_name @@ -235,6 +236,38 @@ def list_channels(self, file_name=None): remove(self.fileName) return name_list + def list_channels_sources(self, file_name=None): + + """ Read MDF file blocks and returns a list of contained channels and + source message information + + + Parameters + ---------------- + file_name : string + file name + + Returns + ----------- + nameList : list of string + list of channel names with source message information + """ + + if self.fileName is None or file_name is not None: + self.fileName = file_name + # Open file + (self.fid, self.fileName, zipfile) = _open_mdf(self.fileName) + # read Identifier block + self.fid.seek(28) + mdf_version_number = unpack('= 400: # up to version 4.x not compatible with version 3.x + channel_name_sources_list = Info4() + name_sources_list = channel_name_sources_list.list_channels_sources4(self.fileName, self.fid) + if zipfile: # not from mdfreader.read() + remove(self.fileName) + return name_sources_list + def _generate_dummy_mdf(self, channel_list=None): """ Parse MDF file structure and create a dummy mdf object structure @@ -342,7 +375,7 @@ class Mdf(Mdf4, Mdf3): """ def read(self, file_name=None, multi_processed=False, channel_list=None, convert_after_read=True, - filter_channel_names=False, no_data_loading=False, compression=False, metadata=2): + filter_channel_names=False, no_data_loading=False, compression=False, metadata=2, source_list=None): """ reads mdf file version 3.x and 4.x Parameters @@ -382,6 +415,10 @@ def read(self, file_name=None, multi_processed=False, channel_list=None, convert 1: used for noDataLoading. 0: all metadata reading, including Source Information, Attachment, etc.. + source_list: list, optional, default = None + list containing the source messages to identify what device send the + different message. + Notes -------- If you keep convertAfterRead to true, you can set attribute mdf.multiProc to activate channel conversion @@ -417,7 +454,7 @@ def read(self, file_name=None, multi_processed=False, channel_list=None, convert else: # MDF version 4.x if not no_data_loading: self.read4(self.fileName, None, multi_processed, channel_list, - convert_after_read, filter_channel_names, compression, metadata) + convert_after_read, filter_channel_names, compression, metadata, source_list) else: # populate minimum mdf structure self._noDataLoading = True self.info = Info4(None, fid=self.fid, @@ -934,7 +971,7 @@ def set_attribute(f, name, value): # create variable cleaned_name = clean_name(name) if len(self.masterChannelList) == 1: # mdf resampled - var[name] = f.createVariable(cleaned_name, data_type, (list(self.masterChannelList.keys())[0], )) + var[name] = f.createVariable(cleaned_name, data_type, (list(self.masterChannelList.keys())[0],)) else: # not resampled var[name] = f.createVariable(cleaned_name, data_type, (self.get_channel_master(name),)) # Create attributes @@ -995,6 +1032,7 @@ def set_attribute(obj, name, value): pass else: pass + if sampling is not None: self.resample(sampling) if file_name is None: @@ -1037,7 +1075,7 @@ def set_attribute(obj, name, value): and master_name is not None: group_name = master_name else: - group_name = masterField+str(n_groups) + group_name = masterField + str(n_groups) groups[group_name] = n_groups grp[n_groups] = file_group.create_group(group_name) set_attribute(grp[n_groups], masterField, master_name) @@ -1131,7 +1169,7 @@ def export_to_matlab(self, file_name=None): elif channel_name is not None: warn(u'Could not export {}, name is not compatible with Matlab'.format(channel)) try: - savemat(file_name, temp, format='7.3', long_field_names=True, oned_as='column', + savemat(file_name, temp, format='7.3', long_field_names=True, oned_as='column', structured_numpy_ndarray_as_struct=True) except: savemat(file_name, temp, long_field_names=True, format='5') @@ -1304,7 +1342,7 @@ def concat_mdf(self, mdf_class): for master_channel_name in second_class_masters: type = mdf_class.get_channel_master_type(master_channel_name) data = mdf_class.get_channel_data(master_channel_name) - if type not in second_masters: + if type not in second_masters: second_masters[type] = {} second_masters[type]['max'] = data[-1] # second_masters[type]['sampling'] = mean(diff(data)) # sampling @@ -1491,7 +1529,8 @@ def convert_to_pandas(self, sampling=None): self[group + '_group'] = self.return_pandas_dataframe(group) # clean rest of self from data and time channel information [self[channel].pop(dataField) for channel in self.masterChannelList[group]] - [self[channel].pop(masterField) for channel in self.masterChannelList[group] if masterField in self[channel]] + [self[channel].pop(masterField) for channel in self.masterChannelList[group] if + masterField in self[channel]] self.masterGroups = [] # save time groups name in list [self.masterGroups.append(group + '_group') for group in self.masterChannelList] self.masterChannelList = {}