Skip to content

Commit

Permalink
Squashed 'modules/pmi/' changes from a4bb2ff865..4a4cd0df72
Browse files Browse the repository at this point in the history
4a4cd0df72 Add method to add UniProt reference

git-subtree-dir: modules/pmi
git-subtree-split: 4a4cd0df72eea7a7bc9b478d5f6a82e12b4b89bd
  • Loading branch information
benmwebb committed Dec 5, 2024
1 parent fc21af9 commit ab2c9a6
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 11 deletions.
39 changes: 33 additions & 6 deletions modules/pmi/pyext/src/mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import ihm.representation
import ihm.geometry
import ihm.cross_linkers
import ihm.reference


def _assign_id(obj, seen_objs, obj_by_id):
Expand Down Expand Up @@ -1082,7 +1083,7 @@ def _get_alphabet(self, alphabet):
else:
raise TypeError("Don't know how to handle %s" % alphabet)

def add(self, component_name, sequence, offset, alphabet):
def add(self, component_name, sequence, offset, alphabet, uniprot):
def entity_seq(sequence):
# Map X to UNK
if 'X' in sequence:
Expand All @@ -1095,7 +1096,8 @@ def entity_seq(sequence):
d = component_name.split("@")[0].split(".")[0]
entity = Entity(entity_seq(sequence), description=d,
pmi_offset=offset,
alphabet=self._get_alphabet(alphabet))
alphabet=self._get_alphabet(alphabet),
uniprot=uniprot)
self.system.entities.append(entity)
self._sequence_dict[sequence] = entity
self[component_name] = self._sequence_dict[sequence]
Expand Down Expand Up @@ -1198,11 +1200,18 @@ class Entity(ihm.Entity):
removed). The actual offset (which is the integer to be added to the
IHM numbering to get PMI numbering, or equivalently the number of
not-represented N-terminal residues in the PMI sequence) is
available in the `pmi_offset` member."""
def __init__(self, sequence, pmi_offset, *args, **keys):
available in the `pmi_offset` member.
If a UniProt accession was provided for the sequence (either when
State.create_molecule() was called, or in the FASTA alignment file
header) then that is available in the `uniprot` member, and can be
added to the IHM system with the add_uniprot_reference method.
"""
def __init__(self, sequence, pmi_offset, uniprot, *args, **keys):
# Offset between PMI numbering and IHM; <pmi_#> = <ihm_#> + pmi_offset
# (pmi_offset is also the number of N-terminal gaps in the FASTA file)
self.pmi_offset = pmi_offset
self.uniprot = uniprot
super().__init__(sequence, *args, **keys)

def pmi_residue(self, res_id):
Expand All @@ -1214,6 +1223,24 @@ def pmi_range(self, res_id_begin, res_id_end):
off = self.pmi_offset
return self(res_id_begin - off, res_id_end - off)

def add_uniprot_reference(self):
"""Add UniProt accession (if available) to the IHM system.
If a UniProt accession was provided for the sequence (either when
State.create_molecule() was called, or in the FASTA alignment file
header), then look this up at the UniProt web site (requires
network access) to get full information, and add it to the IHM
system. The resulting reference object is returned. If the IMP
and UniProt sequences are not identical, then this object may
need to be modified by specifying an alignment and/or
single-point mutations.
"""
if self.uniprot:
print('Adding UniProt accession %s reference for entity %s'
% (self.uniprot, self.description))
ref = ihm.reference.UniProtSequence.from_accession(self.uniprot)
self.references.append(ref)
return ref


class AsymUnit(ihm.AsymUnit):
"""A single asymmetric unit in the system. This roughly corresponds to
Expand Down Expand Up @@ -1397,7 +1424,7 @@ def create_component(self, state, name, modeled, asym_name=None):
self.all_modeled_components.append(name)

def add_component_sequence(self, state, name, seq, asym_name=None,
alphabet=None):
alphabet=None, uniprot=None):
if asym_name is None:
asym_name = name

Expand All @@ -1409,7 +1436,7 @@ def add_component_sequence(self, state, name, seq, asym_name=None,
# Offset is always zero to start with; this may be modified
# later in finalize_build() if any non-modeled N-terminal
# residues are removed
self.entities.add(name, seq, 0, alphabet)
self.entities.add(name, seq, 0, alphabet, uniprot)
if asym_name in self.asym_units:
if self.asym_units[asym_name] is None:
# Set up a new asymmetric unit for this component
Expand Down
3 changes: 2 additions & 1 deletion modules/pmi/pyext/src/topology/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,7 +809,8 @@ def _build_protocol_output(self):
asym_name=self._name_with_copy)
po.add_component_sequence(state, name, self.sequence,
asym_name=self._name_with_copy,
alphabet=self.alphabet)
alphabet=self.alphabet,
uniprot=self.uniprot)

def _finalize_build(self):
# For clones, pass the representation of the original molecule
Expand Down
30 changes: 26 additions & 4 deletions modules/pmi/test/test_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,21 +315,43 @@ def test_cif_entities(self):
"""Test _EntityMapper class"""
system = ihm.System()
c = IMP.pmi.mmcif._EntityMapper(system)
c.add('foo', 'MELS', 0, alphabet=None)
c.add('bar', 'SELM', 0, alphabet=IMP.pmi.alphabets.amino_acid)
c.add('foo_2', 'MELS', 0, alphabet=None)
c.add('foo', 'MELS', 0, alphabet=None, uniprot=None)
c.add('bar', 'SELM', 0, alphabet=IMP.pmi.alphabets.amino_acid,
uniprot='baracc')
c.add('foo_2', 'MELS', 0, alphabet=None, uniprot=None)
self.assertRaises(TypeError, c.add, 'baz', 'MELSXX', 0,
alphabet='garbage')
alphabet='garbage', uniprot=None)
self.assertEqual(len(system.entities), 2)
self.assertIs(c['foo'], c['foo_2'])
self.assertIsNot(c['foo'], c['bar'])
a = system.entities
self.assertEqual(len(a), 2)
self.assertEqual(a[0].description, 'foo')
self.assertIsNone(a[0].uniprot)
self.assertEqual(''.join(x.code for x in a[0].sequence), 'MELS')
self.assertEqual(a[1].description, 'bar')
self.assertEqual(a[1].uniprot, 'baracc')
self.assertEqual(''.join(x.code for x in a[1].sequence), 'SELM')

def test_entity_add_uniprot_reference(self):
"""Test Entity.add_uniprot_reference()"""
system = ihm.System()
c = IMP.pmi.mmcif._EntityMapper(system)
c.add('foo', 'MELS', 0, alphabet=None, uniprot=None)
c.add('bar', 'SELM', 0, alphabet=None, uniprot='baracc')
# Mock out UniProtSequence.from_accession
orig = ihm.reference.UniProtSequence.from_accession
def mock_from_acc(acc):
return "mock+" + acc
try:
ihm.reference.UniProtSequence.from_accession = mock_from_acc
ref = c['foo'].add_uniprot_reference()
self.assertIsNone(ref)
ref = c['bar'].add_uniprot_reference()
self.assertEqual(ref, 'mock+baracc')
finally:
ihm.reference.UniProtSequence.from_accession = orig

def test_all_datasets_all_group(self):
"""Test AllDatasets.get_all_group()"""
s = ihm.System()
Expand Down

0 comments on commit ab2c9a6

Please sign in to comment.