Skip to content

Commit

Permalink
bugfixs; closes #16
Browse files Browse the repository at this point in the history
  • Loading branch information
xrotwang committed Nov 26, 2023
1 parent 130465f commit c6642cb
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 1 deletion.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changes

## [Unreleased]

- Fixed bug whereby charstate labels with illegal tokens were incorrectly read.
- Fixed bug whereby the last charstate label was never detected as duplicate.


## [v1.9.1] - 2023-11-25

- Fixed support for writing comments.
Expand Down
5 changes: 5 additions & 0 deletions src/commonnexus/blocks/characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,10 +549,15 @@ def __init__(self, tokens, nexus=None):
if isinstance(w, Token) and w.text == '/':
in_states = True
continue
if name:
raise ValueError(
'Illegal token in charstatelabel: "{}{}"'.format(name, w))
name = w
except StopIteration:
break
if num:
if name and name in names:
duplicate_charlabel(name, 'CHARSTATELABELS', nexus)
self.characters.append(types.SimpleNamespace(number=num, name=name, states=states))
elif comma: # There was a comma, but no new label.
warnings.warn('Trailing comma in CHARSTATELABELS command')
Expand Down
2 changes: 1 addition & 1 deletion src/commonnexus/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
The following punctuation marks have special properties: [ ] do not break a
word; + and - are allowed as state symbols, but none of the rest are allowed; - is
considered punctuation except were it is the minus sign in a negative number.
considered punctuation except where it is the minus sign in a negative number.
"""
import enum
import itertools
Expand Down
53 changes: 53 additions & 0 deletions tests/test_blocks_characters.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,56 @@ def test_Data_with_mixed_charlabels(nexus):
nex = nexus(DATA="DIMENSIONS NCHAR=2; CHARSTATELABELS 1 x, 2 ; MATRIX t1 1 1;")
m = nex.characters.get_matrix()
assert '2' in m['t1'], 'unspecified character label'


def test_illegal_charstatelabel():
nex = Nexus("""\
#NEXUS
BEGIN DATA;
DIMENSIONS NTAX=3 NCHAR=3;
FORMAT DATATYPE=STANDARD MISSING=? GAP=- SYMBOLS="01";
CHARSTATELABELS
1 hand_1,
2 burn(tr.)_3,
3 claw(nail)_3
;
MATRIX
A 001
B 000
C 000
;
END;""")
with pytest.raises(ValueError) as e:
_ = nex.DATA.CHARSTATELABELS
assert 'burn(' in str(e)

with pytest.raises(ValueError):
nex.validate()


def test_duplicate_charstatelabels():
nex = Nexus("""\
#NEXUS
BEGIN DATA;
DIMENSIONS NTAX=3 NCHAR=3;
FORMAT DATATYPE=STANDARD MISSING=? GAP=- SYMBOLS="01";
CHARSTATELABELS
1 hand_1,
2 _3,
3 _3
;
MATRIX
A 001
B 000
C 000
;
END;""")
with warnings.catch_warnings(record=True) as w:
_ = nex.DATA.CHARSTATELABELS
assert len(w) == 1, 'Expected 1 warning, got %r' % w

nex.cfg.strict = True
with pytest.raises(ValueError):
_ = nex.DATA.CHARSTATELABELS

0 comments on commit c6642cb

Please sign in to comment.