diff --git a/nmrglue/fileio/bruker.py b/nmrglue/fileio/bruker.py index e8e166a3..179da4f1 100644 --- a/nmrglue/fileio/bruker.py +++ b/nmrglue/fileio/bruker.py @@ -1904,7 +1904,12 @@ def read_jcamp(filename): with open(filename, 'r') as f: while True: # loop until end of file is found - line = f.readline().rstrip() # read a line + try: + line = f.readline().rstrip() # read a line + except Exception as e: + warn("Unable read line, leave it as a comment") + line = "$$" + if line == '': # end of file found break diff --git a/nmrglue/fileio/jcampdx.py b/nmrglue/fileio/jcampdx.py index bec0a058..920274db 100644 --- a/nmrglue/fileio/jcampdx.py +++ b/nmrglue/fileio/jcampdx.py @@ -37,14 +37,14 @@ def _getkey(keystr): .replace("-", "").replace("_", "").replace("/", "")) -def _readrawdic(filename): +def _readrawdic(filename, read_err=None): ''' Reads JCAMP-DX file to key-value dictionary, from which actual data is separated later. ''' dic = {"_comments": []} # create empty dictionary - filein = open(filename, 'r') + filein = open(filename, 'r', errors=read_err) currentkey = None currentvaluestrings = [] @@ -151,6 +151,8 @@ def _detect_format(dataline): firstvalue_re = re.compile( "(\s)*([+-]?\d+\.?\d*|[+-]?\.\d+)([eE][+-]?\d+)?(\s)*") + xy_re = re.compile('^[0-9\.]+,[ ]?[0-9\.]+') + index = firstvalue_re.match(dataline).end() if index is None: return -1 @@ -165,6 +167,10 @@ def _detect_format(dataline): return 1 if firstchar in _SQZ_DIGITS: return 1 + + if re.search(xy_re, dataline): + return 2 + return 0 @@ -347,10 +353,35 @@ def _parse_pseudo(datalines): return data +def _parse_xy_xy(datalines): + pts = [] + len_group_data = 0 + for dataline in datalines: + if not dataline: + continue + xy_re = re.compile('[^ ][0-9\.]+, [0-9\.]+') + group_data = re.findall(xy_re, dataline) + len_group_data = len(group_data) + if len_group_data == 0: + xy_re = re.compile('[^ ][0-9\.]+,[0-9\.]+;') + group_data = re.findall(xy_re, dataline) + + for data in group_data: + clean_data = data.replace(', ', ',') + clean_data = clean_data.replace(';', '') + x, y = clean_data.split(',') + pts.append([float(x), float(y)]) + return [pts] + + def _parse_data(datastring): ''' Creates numpy array from datalines ''' + probe_data = datastring[80:320] + if ',' in probe_data and not('.' in probe_data): # fix comma as decimal points + datastring = datastring.replace(',', '.') + datalines = datastring.split("\n") headerline = datalines[0] @@ -364,6 +395,11 @@ def _parse_data(datastring): data = _parse_pseudo(datalines) elif mode == 0: data = _parse_affn_pac(datalines) + elif mode == 2: + if headerline == '(X++(Y..Y))': + data = _parse_affn_pac(datalines) + else: + data = _parse_xy_xy(datalines) else: return None if data is None: @@ -417,7 +453,7 @@ def find_yfactors(dic): return (factor_r, factor_i) -def _getdataarray(dic): +def _getdataarray(dic, show_all_data=False): ''' Main function for data array parsing, input is the raw dictionary from _readrawdic @@ -447,19 +483,23 @@ def _getdataarray(dic): idatalist.append(data) else: rdatalist.append(data) - if len(rdatalist) > 1: - warn("NTUPLES: multiple real arrays, returning first one only") - if len(idatalist) > 1: - warn("NTUPLES: multiple imaginary arrays, \ - returning first one only") - if rdatalist: - if idatalist: - data = [rdatalist[0], idatalist[0]] - else: - data = rdatalist[0] + + if show_all_data: + data = { 'real': rdatalist, 'imaginary': idatalist } else: - if idatalist: - data = [None, idatalist[0]] + if len(rdatalist) > 1: + warn("NTUPLES: multiple real arrays, returning first one only") + if len(idatalist) > 1: + warn("NTUPLES: multiple imaginary arrays, \ + returning first one only") + if rdatalist: + if idatalist: + data = [rdatalist[0], idatalist[0]] + else: + data = rdatalist[0] + else: + if idatalist: + data = [None, idatalist[0]] if data is None: # XYDATA try: @@ -472,11 +512,27 @@ def _getdataarray(dic): except KeyError: warn("XYDATA not found ") + if data is None: # PEAK TABLE + try: + valuelist = dic["PEAKTABLE"] + if len(valuelist) == 1: + data, datatype = _parse_data(valuelist[0]) + else: + warn("Multiple PEAKTABLE arrays in JCAMP-DX file, \ + returning first one only") + except KeyError: + warn("PEAKTABLE not found ") + # apply YFACTOR to data if available if is_ntuples: yfactor_r, yfactor_i = find_yfactors(dic) if yfactor_r is None or yfactor_r is None: warn("NTUPLES: YFACTORs not applied, parsing failed") + elif show_all_data: + for i, _ in enumerate(data['real']): + data['real'][i] = data['real'][i] * yfactor_r + for i, _ in enumerate(data['imaginary']): + data['imaginary'][i] = data['imaginary'][i] * yfactor_i else: data[0] = data[0] * yfactor_r data[1] = data[1] * yfactor_i @@ -492,7 +548,7 @@ def _getdataarray(dic): return data -def read(filename): +def read(filename, show_all_data=False, read_err=None): """ Read JCAMP-DX file @@ -515,13 +571,14 @@ def read(filename): # first read everything (including data array) to "raw" dictionary, # in which data values are read as raw strings including whitespace # and newlines - dic = _readrawdic(filename) + dic = _readrawdic(filename, read_err) # find and parse NMR data array from raw dic - data = _getdataarray(dic) + data = _getdataarray(dic, show_all_data) # remove data tables from dic try: + dic['XYDATA_OLD'] = dic["XYDATA"] del dic["XYDATA"] except KeyError: pass