Skip to content

Commit

Permalink
Get Entity sequence from Residues if possible
Browse files Browse the repository at this point in the history
If we did not provide the primary sequence in
the Chain object, try to determine it by
enumerating all child Residues. This may not work
(e.g. if we have one or more Fragments), in which
case, throw an exception.
  • Loading branch information
benmwebb committed Sep 15, 2023
1 parent cb5803b commit 35fead7
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 3 deletions.
24 changes: 22 additions & 2 deletions modules/mmcif/pyext/src/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,30 @@ def __init__(self, system):
IMP.atom.RNA: ihm.RNAAlphabet,
IMP.atom.DNA: _CustomDNAAlphabet}

def add(self, chain):
def _get_sequence_from_residues(self, chain, seq_from_res):
seq_id_begin, seq = seq_from_res
# todo: handle seq_id_begin != 1
if not seq:
raise ValueError("Chain %s has no sequence and no residues"
% chain)
missing_seq = [ind + seq_id_begin
for (ind, res) in enumerate(seq) if res is None]
if missing_seq:
raise ValueError(
"Chain %s has no declared sequence; tried to determine the "
"sequence from Residues, but the following residue indices "
"have no residue type (perhaps covered only by Fragments): %s"
% (chain, str(missing_seq)))
return tuple(seq)

def add(self, chain, seq_from_res=None):
sequence = chain.get_sequence()
if sequence == '':
raise ValueError("Chain %s has no sequence" % chain)
if seq_from_res is not None:
sequence = self._get_sequence_from_residues(chain,
seq_from_res)
else:
raise ValueError("Chain %s has no sequence" % chain)
else:
# Map one-letter codes to ihm.ChemComp
alphabet = self._alphabet_map[chain.get_chain_type()]()
Expand Down
2 changes: 1 addition & 1 deletion modules/mmcif/pyext/src/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def _add_state(self, state):
self._state_by_name[state.name] = state

def _add_chain(self, chain, seq_from_res):
entity = self._entities.add(chain)
entity = self._entities.add(chain, seq_from_res)
component = self._components.add(chain, entity)
return component

Expand Down
11 changes: 11 additions & 0 deletions modules/mmcif/test/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ def test_entity_mapper_add(self):
# Cannot add chains with no sequence
chain5 = MockChain("E", sequence='')
self.assertRaises(ValueError, e.add, chain5)
# Chain with no declared sequence, but we have from-residue sequence
alpha = ihm.LPeptideAlphabet()
e.add(chain5, seq_from_res=(1, (alpha['C'], alpha['G'])))
# List should work as well as tuple
e.add(chain5, seq_from_res=(1, [alpha['C'], alpha['G']]))
# Will not work if from-residue sequence is also empty
self.assertRaises(ValueError, e.add, chain5, seq_from_res=(1, ()))
# Also no good if the from-residue sequence has gaps
self.assertRaises(ValueError, e.add, chain5,
seq_from_res=(1, (alpha['C'], None,
alpha['G'], None)))

def test_entity_naming(self):
"""Test naming of Entities"""
Expand Down

0 comments on commit 35fead7

Please sign in to comment.