From 80bc90ddcfb7ae2e1ee2d5c6e0597807db870f5e Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Fri, 15 Sep 2023 14:30:45 -0700 Subject: [PATCH] Use chain sequence offset if provided Get the sequence offset either from the Chain object or from the sequence determined from Residues and pass that through to python-ihm's AsymUnit class. (Currently, we only support offset==0.) --- modules/mmcif/pyext/src/data.py | 19 ++++++++++++------- modules/mmcif/pyext/src/util.py | 4 ++-- modules/mmcif/test/test_data.py | 29 +++++++++++++++++++++-------- 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/modules/mmcif/pyext/src/data.py b/modules/mmcif/pyext/src/data.py index a4014ddced..e1eb748757 100644 --- a/modules/mmcif/pyext/src/data.py +++ b/modules/mmcif/pyext/src/data.py @@ -95,7 +95,6 @@ def __init__(self, system): def _get_sequence_from_residues(self, chain, seq_from_res): seq_id_begin, seq = seq_from_res - # todo: handle seq_id_begin != 1 if not seq: raise ValueError("Chain %s has no sequence and no residues" % chain) @@ -107,14 +106,15 @@ def _get_sequence_from_residues(self, chain, seq_from_res): "sequence from Residues, but the following residue indices " "have no residue type (perhaps covered only by Fragments): %s" % (chain, str(missing_seq))) - return tuple(seq) + return seq_id_begin - 1, tuple(seq) def add(self, chain, seq_from_res=None): sequence = chain.get_sequence() + offset = chain.get_sequence_offset() if sequence == '': if seq_from_res is not None: - sequence = self._get_sequence_from_residues(chain, - seq_from_res) + offset, sequence = self._get_sequence_from_residues( + chain, seq_from_res) else: raise ValueError("Chain %s has no sequence" % chain) else: @@ -127,7 +127,7 @@ def add(self, chain, seq_from_res=None): self._entities.append(entity) self._sequence_dict[sequence] = entity self[chain] = self._sequence_dict[sequence] - return self[chain] + return self[chain], offset def get_all(self): """Yield all entities""" @@ -189,7 +189,7 @@ def _handle_chain(self, chain): name = map_key = chain.name return modeled, asym_id, map_key, name - def add(self, chain, entity): + def add(self, chain, entity, offset): """Add a chain (either an IMP Chain object for a modeled component, or a NonModeledChain object for a non-modeled component)""" modeled, asym_id, map_key, name = self._handle_chain(chain) @@ -203,8 +203,13 @@ def add(self, chain, entity): entity.description = \ component.name.split("@")[0].split(".")[0] self._all_components.append(component) + if offset != 0: + raise ValueError( + "Non-zero chain sequence offsets are not " + "currently handled") if modeled: - asym = ihm.AsymUnit(entity, name, id=asym_id) + asym = ihm.AsymUnit(entity, name, id=asym_id, + auth_seq_id_map=offset) self.system.asym_units.append(asym) component.asym_unit = asym self._all_modeled_components.append(component) diff --git a/modules/mmcif/pyext/src/util.py b/modules/mmcif/pyext/src/util.py index 51a3129ccf..af83e17020 100644 --- a/modules/mmcif/pyext/src/util.py +++ b/modules/mmcif/pyext/src/util.py @@ -269,8 +269,8 @@ def _add_state(self, state): self._state_by_name[state.name] = state def _add_chain(self, chain, seq_from_res): - entity = self._entities.add(chain, seq_from_res) - component = self._components.add(chain, entity) + entity, offset = self._entities.add(chain, seq_from_res) + component = self._components.add(chain, entity, offset) return component def add_rmf(self, filename, name=None, frame=0, states=None, diff --git a/modules/mmcif/test/test_data.py b/modules/mmcif/test/test_data.py index 4ce6a43040..817e1e637b 100644 --- a/modules/mmcif/test/test_data.py +++ b/modules/mmcif/test/test_data.py @@ -12,12 +12,16 @@ def add_attrs(r): class MockChain(object): - def __init__(self, name, sequence='', chain_type=IMP.atom.Protein): + def __init__(self, name, sequence='', chain_type=IMP.atom.Protein, + offset=0): self.name = name self.sequence = sequence self.chain_type = chain_type + self.offset = offset def get_sequence(self): return self.sequence + def get_sequence_offset(self): + return self.offset def get_chain_type(self): return self.chain_type @@ -102,9 +106,9 @@ def test_entity_naming(self): chain1 = MockChain("A.1@12") chain2 = MockChain("A.2@12") chain3 = MockChain(None) - comp1 = cm.add(chain1, entity1) - comp2 = cm.add(chain2, entity1) - comp3 = cm.add(chain3, entity2) + comp1 = cm.add(chain1, entity1, 0) + comp2 = cm.add(chain2, entity1, 0) + comp3 = cm.add(chain3, entity2, 0) self.assertEqual(chain1.name, "A.1@12") self.assertEqual(chain2.name, "A.2@12") self.assertIsNone(chain3.name) @@ -118,10 +122,19 @@ def test_component_mapper_same_id_chain(self): entity2 = ihm.Entity("DEF") chain1 = MockChain("A") chain2 = MockChain("A") - comp1 = cm.add(chain1, entity1) + comp1 = cm.add(chain1, entity1, 0) self.assertEqual(cm[chain1], comp1) # Cannot add two chains with the same ID but different sequences - self.assertRaises(ValueError, cm.add, chain2, entity2) + self.assertRaises(ValueError, cm.add, chain2, entity2, 0) + + def test_component_mapper_non_zero_offset(self): + """Test ComponentMapper given non-zero sequence offset""" + system = ihm.System() + cm = IMP.mmcif.data._ComponentMapper(system) + entity1 = ihm.Entity("ANC") + chain1 = MockChain("A") + # Non-zero offsets are not currently handled + self.assertRaises(ValueError, cm.add, chain1, entity1, 100) def test_component_mapper_get_all(self): """Test ComponentMapper get_all()""" @@ -131,8 +144,8 @@ def test_component_mapper_get_all(self): entity2 = ihm.Entity("DEF") chain1 = MockChain("A") chain2 = MockChain("B") - comp1 = cm.add(chain1, entity1) - comp2 = cm.add(chain2, entity2) + comp1 = cm.add(chain1, entity1, 0) + comp2 = cm.add(chain2, entity2, 0) allc = cm.get_all() self.assertEqual(allc, [comp1, comp2]) self.assertEqual(cm.get_all_modeled(), [])