Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor _parse_xy_xy to standardize output as 3D arrays #226

Closed
wants to merge 13 commits into from
7 changes: 6 additions & 1 deletion nmrglue/fileio/bruker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1904,7 +1904,12 @@ def read_jcamp(filename):
with open(filename, 'r') as f:
while True: # loop until end of file is found

line = f.readline().rstrip() # read a line
try:
line = f.readline().rstrip() # read a line
except Exception as e:
warn("Unable read line, leave it as a comment")
line = "$$"

if line == '': # end of file found
break

Expand Down
93 changes: 75 additions & 18 deletions nmrglue/fileio/jcampdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ def _getkey(keystr):
.replace("-", "").replace("_", "").replace("/", ""))


def _readrawdic(filename):
def _readrawdic(filename, read_err=None):
'''
Reads JCAMP-DX file to key-value dictionary, from which
actual data is separated later.
'''

dic = {"_comments": []} # create empty dictionary
filein = open(filename, 'r')
filein = open(filename, 'r', errors=read_err)

currentkey = None
currentvaluestrings = []
Expand Down Expand Up @@ -151,6 +151,8 @@ def _detect_format(dataline):
firstvalue_re = re.compile(
"(\s)*([+-]?\d+\.?\d*|[+-]?\.\d+)([eE][+-]?\d+)?(\s)*")

xy_re = re.compile('^[0-9\.]+,[ ]?[0-9\.]+')

index = firstvalue_re.match(dataline).end()
if index is None:
return -1
Expand All @@ -165,6 +167,10 @@ def _detect_format(dataline):
return 1
if firstchar in _SQZ_DIGITS:
return 1

if re.search(xy_re, dataline):
return 2

return 0


Expand Down Expand Up @@ -347,10 +353,35 @@ def _parse_pseudo(datalines):
return data


def _parse_xy_xy(datalines):
pts = []
len_group_data = 0
for dataline in datalines:
if not dataline:
continue
xy_re = re.compile('[^ ][0-9\.]+, [0-9\.]+')
group_data = re.findall(xy_re, dataline)
len_group_data = len(group_data)
if len_group_data == 0:
xy_re = re.compile('[^ ][0-9\.]+,[0-9\.]+;')
group_data = re.findall(xy_re, dataline)

for data in group_data:
clean_data = data.replace(', ', ',')
clean_data = clean_data.replace(';', '')
x, y = clean_data.split(',')
pts.append([float(x), float(y)])
return [pts]


def _parse_data(datastring):
'''
Creates numpy array from datalines
'''
probe_data = datastring[80:320]
if ',' in probe_data and not('.' in probe_data): # fix comma as decimal points
datastring = datastring.replace(',', '.')

datalines = datastring.split("\n")
headerline = datalines[0]

Expand All @@ -364,6 +395,11 @@ def _parse_data(datastring):
data = _parse_pseudo(datalines)
elif mode == 0:
data = _parse_affn_pac(datalines)
elif mode == 2:
if headerline == '(X++(Y..Y))':
data = _parse_affn_pac(datalines)
else:
data = _parse_xy_xy(datalines)
else:
return None
if data is None:
Expand Down Expand Up @@ -417,7 +453,7 @@ def find_yfactors(dic):
return (factor_r, factor_i)


def _getdataarray(dic):
def _getdataarray(dic, show_all_data=False):
'''
Main function for data array parsing, input is the
raw dictionary from _readrawdic
Expand Down Expand Up @@ -447,19 +483,23 @@ def _getdataarray(dic):
idatalist.append(data)
else:
rdatalist.append(data)
if len(rdatalist) > 1:
warn("NTUPLES: multiple real arrays, returning first one only")
if len(idatalist) > 1:
warn("NTUPLES: multiple imaginary arrays, \
returning first one only")
if rdatalist:
if idatalist:
data = [rdatalist[0], idatalist[0]]
else:
data = rdatalist[0]

if show_all_data:
data = { 'real': rdatalist, 'imaginary': idatalist }
else:
if idatalist:
data = [None, idatalist[0]]
if len(rdatalist) > 1:
warn("NTUPLES: multiple real arrays, returning first one only")
if len(idatalist) > 1:
warn("NTUPLES: multiple imaginary arrays, \
returning first one only")
if rdatalist:
if idatalist:
data = [rdatalist[0], idatalist[0]]
else:
data = rdatalist[0]
else:
if idatalist:
data = [None, idatalist[0]]

if data is None: # XYDATA
try:
Expand All @@ -472,11 +512,27 @@ def _getdataarray(dic):
except KeyError:
warn("XYDATA not found ")

if data is None: # PEAK TABLE
try:
valuelist = dic["PEAKTABLE"]
if len(valuelist) == 1:
data, datatype = _parse_data(valuelist[0])
else:
warn("Multiple PEAKTABLE arrays in JCAMP-DX file, \
returning first one only")
except KeyError:
warn("PEAKTABLE not found ")

# apply YFACTOR to data if available
if is_ntuples:
yfactor_r, yfactor_i = find_yfactors(dic)
if yfactor_r is None or yfactor_r is None:
warn("NTUPLES: YFACTORs not applied, parsing failed")
elif show_all_data:
for i, _ in enumerate(data['real']):
data['real'][i] = data['real'][i] * yfactor_r
for i, _ in enumerate(data['imaginary']):
data['imaginary'][i] = data['imaginary'][i] * yfactor_i
else:
data[0] = data[0] * yfactor_r
data[1] = data[1] * yfactor_i
Expand All @@ -492,7 +548,7 @@ def _getdataarray(dic):
return data


def read(filename):
def read(filename, show_all_data=False, read_err=None):
"""
Read JCAMP-DX file

Expand All @@ -515,13 +571,14 @@ def read(filename):
# first read everything (including data array) to "raw" dictionary,
# in which data values are read as raw strings including whitespace
# and newlines
dic = _readrawdic(filename)
dic = _readrawdic(filename, read_err)

# find and parse NMR data array from raw dic
data = _getdataarray(dic)
data = _getdataarray(dic, show_all_data)

# remove data tables from dic
try:
dic['XYDATA_OLD'] = dic["XYDATA"]
del dic["XYDATA"]
except KeyError:
pass
Expand Down