diff --git a/modules/core/dependency/python-ihm/.github/workflows/codeql-analysis.yml b/modules/core/dependency/python-ihm/.github/workflows/codeql-analysis.yml index e245e2d3da..82140be134 100644 --- a/modules/core/dependency/python-ihm/.github/workflows/codeql-analysis.yml +++ b/modules/core/dependency/python-ihm/.github/workflows/codeql-analysis.yml @@ -39,11 +39,11 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -55,4 +55,4 @@ jobs: python setup.py build_ext --inplace -t build - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v2 diff --git a/modules/core/dependency/python-ihm/.github/workflows/linter.yml b/modules/core/dependency/python-ihm/.github/workflows/linter.yml index d9ba1c3fdd..aebfd1a8de 100644 --- a/modules/core/dependency/python-ihm/.github/workflows/linter.yml +++ b/modules/core/dependency/python-ihm/.github/workflows/linter.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Code - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Lint Code Base uses: docker://github/super-linter:v2.1.0 env: diff --git a/modules/core/dependency/python-ihm/ChangeLog.rst b/modules/core/dependency/python-ihm/ChangeLog.rst index a3d905934e..6c26532603 100644 --- a/modules/core/dependency/python-ihm/ChangeLog.rst +++ b/modules/core/dependency/python-ihm/ChangeLog.rst @@ -1,3 +1,33 @@ +1.1 - 2024-05-09 +================ + - :class:`ihm.System` now allows for one or more official database IDs to + be associated with the system using the new :class:`ihm.Database` class. + This maps to the mmCIF ``_database_2`` category (#135). + - :class:`ihm.location.FileLocation` now allows for an optional file format + to be specified (#139). + - The ``util/make-mmcif.py`` script is now included in the installed package, + so can be run if desired with ``python3 -m ihm.util.make_mmcif`` (#134). + - Bugfix: allow for file sizes in input mmCIF files to be floating point + values (#138). + - Bugfix: the 'Other' content type is now handled correctly when reading + information about external files from an mmCIF file (#139). + +1.0 - 2024-02-13 +================ + - Support for multi-state schemes (such as kinetic rates and relaxation + times for conversions between states) was added; + see :mod:`ihm.multi_state_scheme`. + - Residue numbering in non-polymer, water, and branched entities should + now be better handled, no longer requiring the various scheme tables + to precede ``atom_site``. If you subclass :class:`ihm.model.Model`, atoms + may need to be renumbered; see :meth:`ihm.model.Model.add_atom` (#130). + - Original author-provided numbering can now be provided for waters, + using the ``orig_auth_seq_id_map`` argument to :class:`ihm.WaterAsymUnit`. + - The make-mmcif.py utility script now has basic functionality for + combining multiple input files into one, relabeling chain IDs if necessary. + - An :class:`ihm.Entity` containing just a single sugar is now classified + as a nonpolymer, not branched. + 0.43 - 2023-12-08 ================= - Branched and polymeric empty entities are now distinguished diff --git a/modules/core/dependency/python-ihm/LICENSE b/modules/core/dependency/python-ihm/LICENSE index 7a52608174..317b1f479f 100644 --- a/modules/core/dependency/python-ihm/LICENSE +++ b/modules/core/dependency/python-ihm/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2018-2023 IHM Working Group +Copyright (c) 2018-2024 IHM Working Group Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/modules/core/dependency/python-ihm/MANIFEST.in b/modules/core/dependency/python-ihm/MANIFEST.in index b9081bb5ca..4ec906d30d 100644 --- a/modules/core/dependency/python-ihm/MANIFEST.in +++ b/modules/core/dependency/python-ihm/MANIFEST.in @@ -4,4 +4,4 @@ include examples/* include util/make-mmcif.py include src/ihm_format.h include src/ihm_format.i -include src/ihm_format_wrap_0.43.c +include src/ihm_format_wrap_1.1.c diff --git a/modules/core/dependency/python-ihm/README.md b/modules/core/dependency/python-ihm/README.md index f3be699e7b..e0c8485ddd 100644 --- a/modules/core/dependency/python-ihm/README.md +++ b/modules/core/dependency/python-ihm/README.md @@ -6,9 +6,9 @@ [![Windows Build Status](https://ci.appveyor.com/api/projects/status/5o28oe477ii8ur4h?svg=true)](https://ci.appveyor.com/project/benmwebb/python-ihm) [![codecov](https://codecov.io/gh/ihmwg/python-ihm/branch/main/graph/badge.svg)](https://codecov.io/gh/ihmwg/python-ihm) -This is a Python package to assist in handling [mmCIF](http://mmcif.wwpdb.org/) +This is a Python package to assist in handling [mmCIF](https://mmcif.wwpdb.org/) and [BinaryCIF](https://github.com/dsehnal/BinaryCIF) files compliant with the -[integrative/hybrid modeling (IHM)](http://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Index/) +[integrative/hybrid modeling (IHM)](https://mmcif.wwpdb.org/dictionaries/mmcif_ihm_ext.dic/Index/) extension. It works with Python 2.7 or Python 3. To handle non-integrative theoretical models (for example, homology models), @@ -42,6 +42,14 @@ On a Fedora or RedHat Enterprise Linux box, install with dnf copr enable salilab/salilab; dnf install python3-ihm ``` +On an Ubuntu LTS box, install from +[our PPA](https://launchpad.net/~salilab/+archive/ubuntu/ppa) with + +``` +apt install software-properties-common; add-apt-repository ppa:salilab/ppa; +apt install python3-ihm +``` + Alternatively, install with pip: ``` diff --git a/modules/core/dependency/python-ihm/docs/conf.py b/modules/core/dependency/python-ihm/docs/conf.py index 866dd4f104..e15de8a007 100644 --- a/modules/core/dependency/python-ihm/docs/conf.py +++ b/modules/core/dependency/python-ihm/docs/conf.py @@ -156,3 +156,6 @@ author, 'Python-IHM', 'One line description of project.', 'Miscellaneous'), ] + +# Warn about broken links to classes, etc. +nitpicky = True diff --git a/modules/core/dependency/python-ihm/docs/design.rst b/modules/core/dependency/python-ihm/docs/design.rst index c3062c08f4..9080405c13 100644 --- a/modules/core/dependency/python-ihm/docs/design.rst +++ b/modules/core/dependency/python-ihm/docs/design.rst @@ -27,7 +27,7 @@ Types rather than enums Where the underlying IHM mmCIF dictionary uses an enumeration, generally this corresponds to separate sibling classes in this package. For example, two datasets which differ only in their ``data_type`` -`in the dictionary `_ +`in the dictionary `_ (such as a electron microscopy density map and small angle scattering data) are represented with two classes in this package: :class:`ihm.dataset.EMDensityDataset` and :class:`ihm.dataset.SASDataset`. @@ -42,7 +42,7 @@ This is naturally represented in Python as a hierarchy of classes, with members pointing to other objects as appropriate. IDs are not used to look up other objects, and are only used internally to populate the tables. For example, to group multiple models together, the dictionary assigns all of -the models the same `model_group_id `_ +the models the same `model_group id `_ while in the Python package the :class:`ihm.model.Model` objects are placed into a :class:`ihm.model.ModelGroup` object, which acts like a simple Python list. @@ -71,7 +71,7 @@ mmCIF backend ============= The classes in this package roughly correspond to -`categories `_ +`categories `_ in the underlying IHM mmCIF dictionary. This allows for simple output of mmCIF formatted files, but also allows for the potential future support for other file formats that support the dictionary or a subset of it, such diff --git a/modules/core/dependency/python-ihm/docs/introduction.rst b/modules/core/dependency/python-ihm/docs/introduction.rst index 7e4d83493d..7da9b2c50b 100644 --- a/modules/core/dependency/python-ihm/docs/introduction.rst +++ b/modules/core/dependency/python-ihm/docs/introduction.rst @@ -20,11 +20,11 @@ application with a set of Python objects. This includes Once created, this set of Python objects can be written to an mmCIF file that is compliant with the -`IHM extension `_ -to the `PDBx/mmCIF dictionary `_, +`IHMCIF extension `_ +to the `PDBx/mmCIF dictionary `_, suitable for deposition in the `PDB-Dev repository `_. The files are best viewed -in a viewer that supports IHM mmCIF, such as +in a viewer that supports IHMCIF, such as `UCSF ChimeraX `_, although they may be partially viewable in regular PDBx mmCIF viewers (likely only the atomic coordinates will be visible). diff --git a/modules/core/dependency/python-ihm/docs/main.rst b/modules/core/dependency/python-ihm/docs/main.rst index 800f7c40e1..99dab30146 100644 --- a/modules/core/dependency/python-ihm/docs/main.rst +++ b/modules/core/dependency/python-ihm/docs/main.rst @@ -12,6 +12,9 @@ The :mod:`ihm` Python module .. autoclass:: System :members: +.. autoclass:: Database + :members: + .. autoclass:: Software :members: diff --git a/modules/core/dependency/python-ihm/docs/usage.rst b/modules/core/dependency/python-ihm/docs/usage.rst index fc76b2213b..1778891d27 100644 --- a/modules/core/dependency/python-ihm/docs/usage.rst +++ b/modules/core/dependency/python-ihm/docs/usage.rst @@ -118,7 +118,11 @@ of the data used in modeling: - *Internal numbering*. Residues are always numbered sequentially starting at 1 in an :class:`~ihm.Entity`. All references to residues or residue ranges in - the library use this numbering. + the library use this numbering. For polymers, this internal numbering matches + the ``seq_id`` used in the mmCIF dictionary, while for branched entities, + this matches ``num`` in the dictionary. (For other types of entities + (non-polymers, waters) ``seq_id`` is not used in mmCIF, + but the residues are still numbered sequentially from 1 in this library.) - *Author-provided numbering*. If a different numbering scheme is used by the authors, for example to correspond to the numbering of the original sequence that is modeled, this can be given as an author-provided numbering for diff --git a/modules/core/dependency/python-ihm/ihm/__init__.py b/modules/core/dependency/python-ihm/ihm/__init__.py index 16bc1dc549..6ac0b4ba1a 100644 --- a/modules/core/dependency/python-ihm/ihm/__init__.py +++ b/modules/core/dependency/python-ihm/ihm/__init__.py @@ -20,7 +20,7 @@ import json from . import util -__version__ = '0.43' +__version__ = '1.1' class __UnknownValue(object): @@ -74,14 +74,22 @@ class System(object): :param str id: Unique identifier for this system in the mmCIF file. :param str model_details: Detailed description of the system, like an abstract. + :param databases: If this system is part of one or more official + databases (e.g. PDB, PDB-Dev, SwissModel), details of + the database identifiers. + :type databases: sequence of :class:`Database` """ structure_determination_methodology = "integrative" - def __init__(self, title=None, id='model', model_details=None): + def __init__(self, title=None, id='model', model_details=None, + databases=[]): self.id = id self.title = title self.model_details = model_details + self.databases = [] + self.databases.extend(databases) + self._database_status = {} #: List of plain text comments. These will be added to the top of #: the mmCIF file. @@ -93,7 +101,7 @@ def __init__(self, title=None, id='model', model_details=None): #: List of all authors of this system, as a list of strings (last name #: followed by initials, e.g. "Smith, A.J."). When writing out a file, #: if this list is empty, the set of all citation authors (see - #: :attr:`Citation.authors`) is used instead. + #: :class:`Citation`) is used instead. self.authors = [] #: List of all grants that supported this work. See :class:`Grant`. @@ -663,6 +671,24 @@ def _check_after_write(self): "can be grouped." % g) +class Database(object): + """Information about a System that is part of an official database. + + If a :class:`System` is part of one or more official databases + (e.g. PDB, PDB-Dev, SwissModel), this class contains details of the + database identifiers. It should be passed to the :class:`System` + constructor. + + :param str id: Abbreviated name of the database (e.g. PDB). + :param str code: Identifier from the database (e.g. 1abc). + :param str doi: Digital Object Identifier of the database entry. + :param str accession: Extended accession code of the database entry. + """ + def __init__(self, id, code, doi=None, accession=None): + self.id, self.code = id, code + self.doi, self.accession = doi, accession + + class Software(object): """Software used as part of the modeling protocol. @@ -681,7 +707,7 @@ class Software(object): passed to :class:`ihm.startmodel.StartingModel`, :class:`ihm.protocol.Step`, :class:`ihm.analysis.Step`, or - :class:`ihm.restraint.PredictedContactResstraint` objects. + :class:`ihm.restraint.PredictedContactRestraint` objects. """ def __init__(self, name, classification, description, location, type='program', version=None, citation=None): @@ -902,7 +928,7 @@ def __str__(self): def __get_weight(self): # Calculate weight from formula - if self.formula is None: + if self.formula in (None, unknown): return spl = self.formula.split() # Remove formal charge if present @@ -1250,7 +1276,7 @@ def __init__(self, seq_id, entity=None, asym=None): self.seq_id = seq_id def atom(self, atom_id): - """Get a :class:`Atom` in this residue with the given name.""" + """Get a :class:`~ihm.Atom` in this residue with the given name.""" return Atom(residue=self, id=atom_id) def _get_auth_seq_id(self): @@ -1380,11 +1406,11 @@ def get_chem_comp(s): #: String descriptors of branched chemical structure. #: These generally only make sense for oligosaccharide entities, - #: and should be a list of :class:`BranchDescriptor` objects. + #: and should be a list of :class:`~ihm.BranchDescriptor` objects. self.branch_descriptors = [] #: Any links between components in a branched entity. - #: This is a list of :class:`BranchLink` objects. + #: This is a list of :class:`~ihm.BranchLink` objects. self.branch_links = [] def __str__(self): @@ -1402,7 +1428,7 @@ def is_polymeric(self): def is_branched(self): """Return True iff this entity is branched (generally an oligosaccharide)""" - return ((len(self.sequence) > 0 + return ((len(self.sequence) > 1 and isinstance(self.sequence[0], SaccharideChemComp)) or (len(self.sequence) == 0 and self._hint_branched)) @@ -1491,7 +1517,7 @@ class AsymUnit(object): was modeled. Note that this class should not be used to describe crystal waters; - for that, see :class:`WaterAsymUnit`. + for that, see :class:`ihm.WaterAsymUnit`. :param entity: The unique sequence of this asymmetric unit. :type entity: :class:`Entity` @@ -1523,12 +1549,13 @@ class AsymUnit(object): numbering. This differs from `auth_seq_id_map` as the original numbering need not follow any defined scheme, while `auth_seq_id_map` must follow certain PDB-defined rules. This - can either be a mapping type (dict, list, tuple) in which case + can be any mapping type (dict, list, tuple) in which case ``orig_auth_seq_id = orig_auth_seq_id_map[seq_id]``. If the mapping is None (the default), or a given `seq_id` cannot be found in the mapping, ``orig_auth_seq_id = auth_seq_id``. This mapping is only used in the various `scheme` tables, such as ``pdbx_poly_seq_scheme``. + See :attr:`System.asym_units`. """ @@ -1545,6 +1572,11 @@ def __init__(self, entity, details=None, auth_seq_id_map=0, id=None, self.id = id self._strand_id = strand_id + #: For branched entities read from files, mapping from provisional + #: to final internal numbering (`seq_id`), or None if no mapping is + #: necessary. See :meth:`ihm.model.Model.add_atom`. + self.num_map = None + def _get_auth_seq_id_ins_code(self, seq_id): if isinstance(self.auth_seq_id_map, int): return seq_id + self.auth_seq_id_map, None @@ -1604,13 +1636,14 @@ class WaterAsymUnit(AsymUnit): """ def __init__(self, entity, number, details=None, auth_seq_id_map=0, - id=None, strand_id=None): + id=None, strand_id=None, orig_auth_seq_id_map=None): if entity.type != 'water': raise TypeError( "WaterAsymUnit can only be used for water entities") super(WaterAsymUnit, self).__init__( entity, details=details, auth_seq_id_map=auth_seq_id_map, - id=id, strand_id=strand_id) + id=id, strand_id=strand_id, + orig_auth_seq_id_map=orig_auth_seq_id_map) self.number = number self._water_sequence = [entity.sequence[0]] * number diff --git a/modules/core/dependency/python-ihm/ihm/analysis.py b/modules/core/dependency/python-ihm/ihm/analysis.py index 2ec151ea83..e1fa71bd0e 100644 --- a/modules/core/dependency/python-ihm/ihm/analysis.py +++ b/modules/core/dependency/python-ihm/ihm/analysis.py @@ -8,9 +8,10 @@ class Step(object): """A single step in an :class:`Analysis`. - Normally one of the base classes is used; see :class:`FilterStep`, - :class:`ClusterStep`, :class:`RescoreStep`, :class:`ValidationStep`, - :class:`OtherStep`, and :class:`EmptyStep`. + Normally one of the more specific derived classes is used; + see :class:`FilterStep`, :class:`ClusterStep`, :class:`RescoreStep`, + :class:`ValidationStep`, and :class:`EmptyStep`, although this base + class can be used for a generic 'other' step. :param str feature: feature energy/score;RMSD;dRMSD;other :param int num_models_begin: The number of models at the beginning diff --git a/modules/core/dependency/python-ihm/ihm/dumper.py b/modules/core/dependency/python-ihm/ihm/dumper.py index a3f297c122..dc3d9f2918 100644 --- a/modules/core/dependency/python-ihm/ihm/dumper.py +++ b/modules/core/dependency/python-ihm/ihm/dumper.py @@ -89,13 +89,13 @@ def dump(self, system, writer): class _AuditConformDumper(Dumper): URL = ("https://raw.githubusercontent.com/" + - "ihmwg/IHM-dictionary/%s/ihm-extension.dic") + "ihmwg/IHMCIF/%s/dist/mmcif_ihm.dic") def dump(self, system, writer): with writer.category("_audit_conform") as lp: # Update to match the version of the IHM dictionary we support: - lp.write(dict_name="ihm-extension.dic", dict_version="1.24", - dict_location=self.URL % "9be59e1") + lp.write(dict_name="mmcif_ihm.dic", dict_version="1.25", + dict_location=self.URL % "460a278") class _StructDumper(Dumper): @@ -219,6 +219,24 @@ def dump(self, system, writer): grant_number=grant.grant_number, ordinal=n + 1) +class _DatabaseDumper(Dumper): + def dump(self, system, writer): + with writer.loop("_database_2", + ["database_id", "database_code", + "pdbx_database_accession", "pdbx_DOI"]) as lp: + for d in system.databases: + lp.write(database_id=d.id, database_code=d.code, + pdbx_DOI=d.doi, + pdbx_database_accession=d.accession) + + +class _DatabaseStatusDumper(Dumper): + def dump(self, system, writer): + with writer.category("_pdbx_database_status") as lp: + # Pass through all data items from a Python dict + lp.write(**system._database_status) + + class _ChemCompDumper(Dumper): def dump(self, system, writer): comps = frozenset(comp for e in system.entities for comp in e.sequence) @@ -745,7 +763,7 @@ class _BranchSchemeDumper(Dumper): def dump(self, system, writer): with writer.loop("_pdbx_branch_scheme", ["asym_id", "entity_id", "mon_id", "num", - "pdb_seq_num", "auth_seq_num", + "pdb_seq_num", "pdb_ins_code", "auth_seq_num", "auth_mon_id", "pdb_mon_id", "pdb_asym_id"]) as lp: for asym in system.asym_units: entity = asym.entity @@ -758,7 +776,7 @@ def dump(self, system, writer): lp.write(asym_id=asym._id, pdb_asym_id=asym.strand_id, entity_id=entity._id, num=num + 1, - pdb_seq_num=pdb_seq_num, + pdb_seq_num=pdb_seq_num, pdb_ins_code=ins, auth_seq_num=auth_seq_num, mon_id=comp.id, auth_mon_id=comp.id, pdb_mon_id=comp.id) @@ -979,7 +997,7 @@ def dump_repos(self, writer): def dump_refs(self, writer): with writer.loop("_ihm_external_files", ["id", "reference_id", "file_path", "content_type", - "file_size_bytes", "details"]) as lp: + "file_format", "file_size_bytes", "details"]) as lp: for r in self._ref_by_id: repo = r.repo or self._local_files if r.path is None: @@ -988,6 +1006,7 @@ def dump_refs(self, writer): file_path = self._posix_path(repo._get_full_path(r.path)) lp.write(id=r._id, reference_id=repo._id, file_path=file_path, content_type=r.content_type, + file_format=r.file_format, file_size_bytes=r.file_size, details=r.details) # On Windows systems, convert native paths to POSIX-like (/-separated) @@ -3576,8 +3595,8 @@ class IHMVariant(Variant): """Used to select typical PDBx/IHM file output. See :func:`write`.""" _dumpers = [ _EntryDumper, # must be first - _CollectionDumper, - _StructDumper, _CommentDumper, _AuditConformDumper, _CitationDumper, + _CollectionDumper, _StructDumper, _CommentDumper, _AuditConformDumper, + _DatabaseDumper, _DatabaseStatusDumper, _CitationDumper, _SoftwareDumper, _AuditAuthorDumper, _GrantDumper, _ChemCompDumper, _ChemDescriptorDumper, _EntityDumper, _EntitySrcGenDumper, _EntitySrcNatDumper, _EntitySrcSynDumper, _StructRefDumper, diff --git a/modules/core/dependency/python-ihm/ihm/flr.py b/modules/core/dependency/python-ihm/ihm/flr.py index caa5923e8b..136fa79589 100644 --- a/modules/core/dependency/python-ihm/ihm/flr.py +++ b/modules/core/dependency/python-ihm/ihm/flr.py @@ -203,7 +203,7 @@ class EntityAssembly(object): """The assembly of the entities that are in the system. :param entity: The entity to add. - :type entity: :class:`Entity` + :type entity: :class:`ihm.Entity` :param num_copies: The number of copies for the entity in the assembly. """ @@ -382,7 +382,7 @@ class FRETAnalysis(object): :type lifetime_fit_model: :class:`LifetimeFitModel` :param ref_measurement_group: The group of reference measurements in case of lifetime-based analyses. - :type ref_measurement_group: :class:`LifetimeRefMeasurementGroup` + :type ref_measurement_group: :class:`RefMeasurementGroup` :param str method_name: The method used for the analysis. :param float chi_square_reduced: The chi-square reduced as a quality measure for the fit. diff --git a/modules/core/dependency/python-ihm/ihm/format.py b/modules/core/dependency/python-ihm/ihm/format.py index 0955a4e450..80655ca7d2 100644 --- a/modules/core/dependency/python-ihm/ihm/format.py +++ b/modules/core/dependency/python-ihm/ihm/format.py @@ -12,6 +12,7 @@ import sys import textwrap import operator +import ihm # getargspec is deprecated in Python 3, but getfullargspec has a very # similar interface try: @@ -189,7 +190,7 @@ def loop(self, category, keys): def write_comment(self, comment): """Write a simple comment to the CIF file. The comment will be wrapped if necessary for readability. - See :meth:`set_line_wrap`.""" + See :meth:`_set_line_wrap`.""" if self._line_wrap: for line in textwrap.wrap(comment, 78): self.fh.write('# ' + line + '\n') @@ -251,20 +252,34 @@ class _ValueToken(_Token): class _OmittedValueToken(_ValueToken): """A value that is deliberately omitted (the '.' string in mmCIF)""" - pass + def as_mmcif(self): + return "." class _UnknownValueToken(_ValueToken): """A value that is unknown (the '?' string in mmCIF)""" - pass + def as_mmcif(self): + return "?" class _TextValueToken(_ValueToken): """The value of a variable in mmCIF as a piece of text""" - __slots__ = ['txt'] + __slots__ = ['txt', 'quote'] - def __init__(self, txt): + def __init__(self, txt, quote): self.txt = txt + self.quote = quote + + def as_mmcif(self): + if '\n' in self.txt or self.quote == ';': + suffix = ";\n" if self.txt.endswith('\n') else "\n;\n" + return ";" + self.txt + suffix + elif self.quote == "'": + return "'" + self.txt + "'" + elif self.quote == '"' or ' ' in self.txt: + return '"' + self.txt + '"' + else: + return self.txt class _VariableToken(_Token): @@ -281,14 +296,65 @@ def __init__(self, val, linenum): "(%s) on line %d" % (val, linenum)) +class _PreservingVariableToken(_VariableToken): + """A variable name that preserves the original case of the keyword""" + + __slots__ = ['category', 'keyword', 'orig_keyword'] + + def __init__(self, val, linenum): + super(_PreservingVariableToken, self).__init__(val, linenum) + _, _, self.orig_keyword = val.partition('.') + + def as_mmcif(self): + if self.orig_keyword and self.orig_keyword.lower() == self.keyword: + return self.category + '.' + self.orig_keyword + else: + return self.category + '.' + self.keyword + + +class _CommentToken(_Token): + """A comment in mmCIF without the leading '#'""" + __slots__ = ['txt'] + + def __init__(self, txt): + self.txt = txt + + def as_mmcif(self): + return "#" + self.txt + + +class _WhitespaceToken(_Token): + """Space between other mmCIF tokens""" + __slots__ = ['txt'] + + def __init__(self, txt): + self.txt = txt + + def as_mmcif(self): + return self.txt + + +class _EndOfLineToken(_Token): + """End of a line in an mmCIF file""" + def as_mmcif(self): + return "\n" + + class _DataToken(_Token): """A data_* keyword in mmCIF, denoting a new data block""" - pass + __slots__ = ['txt'] + + def __init__(self, txt): + self.txt = txt + + def as_mmcif(self): + return 'data_' + self.txt class _LoopToken(_Token): """A loop_ keyword in mmCIF, denoting the start of a loop construct""" - pass + def as_mmcif(self): + return "loop_" class _SaveToken(_Token): @@ -314,48 +380,8 @@ def python_to_cif(field): for x in getargspec(h.__call__)[0][1:]] -class CifReader(_Reader): - """Class to read an mmCIF file and extract some or all of its data. - - Use :meth:`read_file` to actually read the file. - - :param file fh: Open handle to the mmCIF file - :param dict category_handler: A dict to handle data - extracted from the file. Keys are category names - (e.g. "_entry") and values are objects that have a `__call__` - method and `not_in_file`, `omitted`, and `unknown` attributes. - The names of the arguments to this `__call__` method - are mmCIF keywords that are extracted from the file (for the - keywords tr_vector[N] and rot_matrix[N][M] simply omit the [ - and ] characters, since these are not valid for Python - identifiers). The object will be called with the data from - the file as a set of strings, or `not_in_file`, `omitted` or - `unkonwn` for any keyword that is not present in the file, - the mmCIF omitted value (.), or mmCIF unknown value (?) - respectively. (mmCIF keywords are case insensitive, so this - class always treats them as lowercase regardless of the - file contents.) - :param unknown_category_handler: A callable (or `None`) that is called - for each category in the file that isn't handled; it is given - two arguments: the name of the category, and the line in the - file at which the category was encountered (if known, otherwise - None). - :param unknown_keyword_handler: A callable (or `None`) that is called - for each keyword in the file that isn't handled (within a - category that is handled); it is given three arguments: - the names of the category and keyword, and the line in the - file at which the keyword was encountered (if known, - otherwise None). - """ - def __init__(self, fh, category_handler, unknown_category_handler=None, - unknown_keyword_handler=None): - if _format is not None: - c_file = _format.ihm_file_new_from_python(fh) - self._c_format = _format.ihm_reader_new(c_file) - self.category_handler = category_handler - self.unknown_category_handler = unknown_category_handler - self.unknown_keyword_handler = unknown_keyword_handler - self._category_data = {} +class _CifTokenizer(object): + def __init__(self, fh): self.fh = fh self._tokens = [] self._token_index = 0 @@ -374,10 +400,6 @@ def _read_line(self): else: return line - def __del__(self): - if hasattr(self, '_c_format'): - _format.ihm_reader_free(self._c_format) - def _read_multiline_token(self, first_line, ignore_multiline): """Read a semicolon-delimited (multiline) token""" lines = [first_line[1:]] # Skip initial semicolon @@ -392,7 +414,7 @@ def _read_multiline_token(self, first_line, ignore_multiline): elif nextline.startswith(';'): # Strip last newline lines[-1] = lines[-1].rstrip('\r\n') - self._tokens = [_TextValueToken("".join(lines))] + self._tokens = [_TextValueToken("".join(lines), ';')] return elif not ignore_multiline: lines.append(nextline) @@ -414,15 +436,19 @@ def _handle_quoted_token(self, line, strlen, start_pos, quote_type): elif end == strlen - 1 or line[end + 1] in _WHITESPACE: # A quoted string is always a literal string, even if it is # "?" or ".", not an unknown/omitted value - self._tokens.append(_TextValueToken(line[start_pos + 1:end])) + self._tokens.append(_TextValueToken(line[start_pos + 1:end], + quote)) return end + 1 # Step past the closing quote + def _skip_initial_whitespace(self, line, strlen, start_pos): + while start_pos < strlen and line[start_pos] in _WHITESPACE: + start_pos += 1 + return start_pos + def _extract_line_token(self, line, strlen, start_pos): """Extract the next token from the given line starting at start_pos, populating self._tokens. The new start_pos is returned.""" - # Skip initial whitespace - while start_pos < strlen and line[start_pos] in _WHITESPACE: - start_pos += 1 + start_pos = self._skip_initial_whitespace(line, strlen, start_pos) if start_pos >= strlen: return strlen if line[start_pos] == '"': @@ -431,6 +457,7 @@ def _extract_line_token(self, line, strlen, start_pos): return self._handle_quoted_token(line, strlen, start_pos, "Single") elif line[start_pos] == "#": # Comment - discard the rest of the line + self._handle_comment(line, start_pos) return strlen else: # Find end of token (whitespace or end of line) @@ -441,11 +468,11 @@ def _extract_line_token(self, line, strlen, start_pos): if val == 'loop_': tok = _LoopToken() elif val.startswith('data_'): - tok = _DataToken() + tok = _DataToken(val[5:]) elif val.startswith('save_'): tok = _SaveToken() elif val.startswith('_'): - tok = _VariableToken(val, self._linenum) + tok = self._handle_variable_token(val, self._linenum) elif val == '.': tok = _OmittedValueToken() elif val == '?': @@ -454,14 +481,22 @@ def _extract_line_token(self, line, strlen, start_pos): # Note that we do no special processing for other reserved # words (global_, save_, stop_). But the probability of # them occurring where we expect a value is pretty small. - tok = _TextValueToken(val) # don't alter case of values + tok = _TextValueToken(val, None) # don't alter case of values self._tokens.append(tok) return end_pos + def _handle_variable_token(self, val, linenum): + return _VariableToken(val, linenum) + + def _handle_comment(self, line, start_pos): + """Potentially handle a comment that spans line[start_pos:].""" + pass + def _tokenize(self, line): """Break up a line into tokens, populating self._tokens""" self._tokens = [] if line.startswith('#'): + self._handle_comment(line, 0) return # Skip comment lines start_pos = 0 strlen = len(line) @@ -494,6 +529,321 @@ def _get_token(self, ignore_multiline=False): self._token_index += 1 return self._tokens[self._token_index - 1] + +class _PreservingCifTokenizer(_CifTokenizer): + """A tokenizer subclass which preserves comments, case and whitespace""" + + def _tokenize(self, line): + _CifTokenizer._tokenize(self, line) + self._tokens.append(_EndOfLineToken()) + + def _handle_comment(self, line, start_pos): + self._tokens.append(_CommentToken(line[start_pos + 1:])) + + def _handle_variable_token(self, val, linenum): + return _PreservingVariableToken(val, linenum) + + def _skip_initial_whitespace(self, line, strlen, start_pos): + end_pos = start_pos + while end_pos < strlen and line[end_pos] in _WHITESPACE: + end_pos += 1 + if end_pos > start_pos: + self._tokens.append(_WhitespaceToken(line[start_pos:end_pos])) + return end_pos + + +class _CategoryTokenGroup(object): + """A group of tokens which set a single data item""" + def __init__(self, vartoken, valtoken): + self.vartoken, self.valtoken = vartoken, valtoken + + def __str__(self): + return ("<_CategoryTokenGroup(%s, %s)>" + % (self.vartoken.as_mmcif(), self.valtoken.token.as_mmcif())) + + def as_mmcif(self): + return self.vartoken.as_mmcif() + self.valtoken.as_mmcif() + "\n" + + def __set_value(self, val): + self.valtoken.value = val + + category = property(lambda self: self.vartoken.category) + keyword = property(lambda self: self.vartoken.keyword) + value = property(lambda self: self.valtoken.value, __set_value) + + +class _LoopHeaderTokenGroup(object): + """A group of tokens that form the start of a loop_ construct""" + def __init__(self, looptoken, category, keywords, end_spacers): + self._loop, self.category = looptoken, category + self.keywords = keywords + self.end_spacers = end_spacers + + def keyword_index(self, keyword): + """Get the zero-based index of the given keyword, or ValueError""" + return [k.token.keyword for k in self.keywords].index(keyword) + + def __str__(self): + return ("<_LoopHeaderTokenGroup(%s, %s)>" + % (self.category, + str([k.token.keyword for k in self.keywords]))) + + def as_mmcif(self): + all_tokens = [self._loop] + self.keywords + self.end_spacers + return "".join(x.as_mmcif() for x in all_tokens) + + +class _LoopRowTokenGroup(object): + """A group of tokens that represent one row in a loop_ construct""" + def __init__(self, items): + self.items = items + + def as_mmcif(self): + return "".join(x.as_mmcif() for x in self.items) + + +class _SpacedToken(object): + """A token with zero or more leading whitespace or newline tokens""" + def __init__(self, spacers, token): + self.spacers, self.token = spacers, token + + def as_mmcif(self): + return ("".join(x.as_mmcif() for x in self.spacers) + + self.token.as_mmcif()) + + def __get_value(self): + if isinstance(self.token, _OmittedValueToken): + return None + elif isinstance(self.token, _UnknownValueToken): + return ihm.unknown + else: + return self.token.txt + + def __set_value(self, val): + if val is None: + self.token = _OmittedValueToken() + elif val is ihm.unknown: + self.token = _UnknownValueToken() + elif isinstance(self.token, _TextValueToken): + self.token.txt = val + else: + self.token = _TextValueToken(val, quote=None) + + value = property(__get_value, __set_value) + + +class _ChangeValueFilter(object): + def __init__(self, target, old, new): + ts = target.split('.') + if len(ts) == 1 or not ts[0]: + self.category = None + else: + self.category = ts[0] + self.keyword = ts[-1] + self.old, self.new = old, new + + def filter_category(self, tok): + if ((self.category is None or tok.category == self.category) + and tok.keyword == self.keyword and tok.value == self.old): + tok.value = self.new + return tok + + def get_loop_filter(self, tok): + if self.category is None or tok.category == self.category: + try: + keyword_index = tok.keyword_index(self.keyword) + except ValueError: + return + + def loop_filter(t): + if t.items[keyword_index].value == self.old: + t.items[keyword_index].value = self.new + return t + return loop_filter + + +class _PreservingCifReader(_PreservingCifTokenizer): + """Read an mmCIF file and break it into tokens""" + def __init__(self, fh): + super(_PreservingCifReader, self).__init__(fh) + + def read_file(self, filters=None): + """Read the file and yield tokens and/or token groups""" + if filters is None: + return self._read_file_internal() + else: + return self._read_file_with_filters(filters) + + def _read_file_with_filters(self, filters): + loop_filters = None + for tok in self._read_file_internal(): + if isinstance(tok, _CategoryTokenGroup): + tok = self._filter_category(tok, filters) + elif isinstance(tok, ihm.format._LoopHeaderTokenGroup): + loop_filters = [f.get_loop_filter(tok) for f in filters] + loop_filters = [f for f in loop_filters if f is not None] + elif (isinstance(tok, ihm.format._LoopRowTokenGroup) + and loop_filters): + tok = self._filter_loop(tok, loop_filters) + if tok is not None: + yield tok + + def _filter_category(self, tok, filters): + for f in filters: + tok = f.filter_category(tok) + if tok is None: + return + return tok + + def _filter_loop(self, tok, filters): + for f in filters: + tok = f(tok) + if tok is None: + return + return tok + + def _read_file_internal(self): + while True: + token = self._get_token() + if token is None: + break + if isinstance(token, _VariableToken): + yield self._read_value(token) + elif isinstance(token, _LoopToken): + for tok in self._read_loop(token): + yield tok + # Did we hit the end of the file? + if self._token_index < 0: + break + else: + yield token + + def _get_spaced_token(self): + """Get the next token plus any number of leading space/EOL tokens""" + spacers = [] + while True: + token = self._get_token() + if isinstance(token, (_EndOfLineToken, _WhitespaceToken)): + spacers.append(token) + else: + return _SpacedToken(spacers, token) + + def _read_value(self, vartoken): + """Read a line that sets a single value, e.g. "_entry.id 1YTI""" + spval = self._get_spaced_token() + if not isinstance(spval.token, _ValueToken): + raise CifParserError( + "No valid value found for %s.%s on line %d" + % (vartoken.category, vartoken.keyword, self._linenum)) + eoltok = self._get_token() + if not isinstance(eoltok, _EndOfLineToken): + raise CifParserError( + "No end of line after %s.%s on line %d" + % (vartoken.category, vartoken.keyword, self._linenum)) + return _CategoryTokenGroup(vartoken, spval) + + def _read_loop(self, looptoken): + """Handle a loop_ construct""" + header = self._read_loop_header(looptoken) + yield header + for line in self._read_loop_data(header.keywords): + yield line + + def _read_loop_header(self, looptoken): + """Read the set of keywords for a loop_ construct""" + category = None + keywords = [] + while True: + spt = self._get_spaced_token() + if isinstance(spt.token, _VariableToken): + if category is None: + category = spt.token.category + elif category != spt.token.category: + raise CifParserError( + "mmCIF files cannot contain multiple " + "categories within a single loop at line %d" + % self._linenum) + keywords.append(spt) + elif isinstance(spt.token, _ValueToken): + # OK, end of keywords; proceed on to values + self._unget_token() + return _LoopHeaderTokenGroup(looptoken, category, keywords, + spt.spacers) + else: + raise CifParserError("Was expecting a keyword or value for " + "loop at line %d" % self._linenum) + + def _read_loop_data(self, keywords): + """Read the data for a loop_ construct""" + while True: + items = [] + for i, keyword in enumerate(keywords): + spt = self._get_spaced_token() + if isinstance(spt.token, _ValueToken): + items.append(spt) + elif i == 0: + # OK, end of the loop + for s in spt.spacers: + yield s + if spt.token is not None: + self._unget_token() + return + else: + raise CifParserError( + "Wrong number of data values in loop " + "(should be an exact multiple of the number " + "of keys) at line %d" % self._linenum) + yield _LoopRowTokenGroup(items) + + +class CifReader(_Reader, _CifTokenizer): + """Class to read an mmCIF file and extract some or all of its data. + + Use :meth:`read_file` to actually read the file. + + :param file fh: Open handle to the mmCIF file + :param dict category_handler: A dict to handle data + extracted from the file. Keys are category names + (e.g. "_entry") and values are objects that have a `__call__` + method and `not_in_file`, `omitted`, and `unknown` attributes. + The names of the arguments to this `__call__` method + are mmCIF keywords that are extracted from the file (for the + keywords tr_vector[N] and rot_matrix[N][M] simply omit the [ + and ] characters, since these are not valid for Python + identifiers). The object will be called with the data from + the file as a set of strings, or `not_in_file`, `omitted` or + `unkonwn` for any keyword that is not present in the file, + the mmCIF omitted value (.), or mmCIF unknown value (?) + respectively. (mmCIF keywords are case insensitive, so this + class always treats them as lowercase regardless of the + file contents.) + :param unknown_category_handler: A callable (or `None`) that is called + for each category in the file that isn't handled; it is given + two arguments: the name of the category, and the line in the + file at which the category was encountered (if known, otherwise + None). + :param unknown_keyword_handler: A callable (or `None`) that is called + for each keyword in the file that isn't handled (within a + category that is handled); it is given three arguments: + the names of the category and keyword, and the line in the + file at which the keyword was encountered (if known, + otherwise None). + """ + def __init__(self, fh, category_handler, unknown_category_handler=None, + unknown_keyword_handler=None): + if _format is not None: + c_file = _format.ihm_file_new_from_python(fh) + self._c_format = _format.ihm_reader_new(c_file) + self.category_handler = category_handler + self.unknown_category_handler = unknown_category_handler + self.unknown_keyword_handler = unknown_keyword_handler + self._category_data = {} + _CifTokenizer.__init__(self, fh) + + def __del__(self): + if hasattr(self, '_c_format'): + _format.ihm_reader_free(self._c_format) + def _read_value(self, vartoken): """Read a line that sets a single value, e.g. "_entry.id 1YTI""" # Only read the value if we're interested in this category and key diff --git a/modules/core/dependency/python-ihm/ihm/location.py b/modules/core/dependency/python-ihm/ihm/location.py index e0f3caaae4..4a46eae611 100644 --- a/modules/core/dependency/python-ihm/ihm/location.py +++ b/modules/core/dependency/python-ihm/ihm/location.py @@ -19,8 +19,8 @@ class Location(object): experimental dataset may be found; - an :class:`~ihm.model.Ensemble` to point to coordinates for an entire ensemble, for example as a DCD file; - - a :class:`LocalizationDensity` to point to an external localization - density, for example in MRC format; + - a :class:`ihm.model.LocalizationDensity` to point to an external + localization density, for example in MRC format; - :data:`ihm.System.locations` to point to other files relating to the modeling in general, such as a modeling control script (:class:`WorkflowFileLocation`) or a command script for a @@ -191,15 +191,16 @@ class FileLocation(Location): containing the file, or `None` if it is stored on the local disk :type repo: :class:`Repository` :param str details: optional description of the file + :param str file_format: optional file type (e.g. TXT, PNG, FASTA) """ _eq_keys = Location._eq_keys + ['repo', 'path', 'content_type'] - content_type = None + content_type = 'Other' - def __init__(self, path, repo=None, details=None): + def __init__(self, path, repo=None, details=None, file_format=None): super(FileLocation, self).__init__(details) - self.repo = repo + self.repo, self.file_format = repo, file_format if repo: self.path = path # Cannot determine file size if non-local diff --git a/modules/core/dependency/python-ihm/ihm/model.py b/modules/core/dependency/python-ihm/ihm/model.py index f9f1fe0f34..8e48938fca 100644 --- a/modules/core/dependency/python-ihm/ihm/model.py +++ b/modules/core/dependency/python-ihm/ihm/model.py @@ -135,6 +135,12 @@ def add_atom(self, atom): """Add to the model's set of :class:`Atom` objects. See :meth:`get_spheres` for more details. + + Note that for branched entities, the `seq_id` of the new atom + is provisional. It should be mapped to the correct ID once the + input file is completely read, using :attr:`ihm.AsymUnit.num_map`. + This is done automatically by ihm.reader when using the default + implementation. """ self._atoms.append(atom) diff --git a/modules/core/dependency/python-ihm/ihm/multi_state_scheme.py b/modules/core/dependency/python-ihm/ihm/multi_state_scheme.py index db8d6d1145..0b0b5148ce 100644 --- a/modules/core/dependency/python-ihm/ihm/multi_state_scheme.py +++ b/modules/core/dependency/python-ihm/ihm/multi_state_scheme.py @@ -17,11 +17,10 @@ class MultiStateScheme(object): :param str details: Details on the scheme. :param connectivities: A list of connectivities that belong to the scheme. - :type connectivities: List of - :class:`ìhm.multi_state_scheme.Connectivity` + :type connectivities: List of :class:`Connectivity` :param relaxation_times: A list of relaxation times not assigned to specific connectivities, but to the scheme - :type relaxation_times: List of :class:`ihm.RelaxationTime` + :type relaxation_times: List of :class:`RelaxationTime` """ def __init__(self, name, details=None, connectivities=None, relaxation_times=None): @@ -149,11 +148,11 @@ class Connectivity(object): :type begin_state: :class:`ihm.model.State` :param end_state: The end state of the connectivity. Can be None in case of states that are not connected to others. - :type end_state: :class:`ìhm.model.State` + :type end_state: :class:`ihm.model.State` :param details: Details to the connectivity. :param dataset_group: The DatasetGroup that was used to obtain information on the connectivity. - :type dataset_group: :class:`ìhm.dataset.DatasetGroup` + :type dataset_group: :class:`ihm.dataset.DatasetGroup` :param kinetic_rate: A kinetic rate assigned to the connectivity. :type kinetic_rate: :class:`KineticRate` :param relaxation_time: A relaxation time assigned to the connectivity. diff --git a/modules/core/dependency/python-ihm/ihm/reader.py b/modules/core/dependency/python-ihm/ihm/reader.py index 86eb9b9f74..3de04a8e0e 100644 --- a/modules/core/dependency/python-ihm/ihm/reader.py +++ b/modules/core/dependency/python-ihm/ihm/reader.py @@ -163,7 +163,7 @@ def _make_new_object(self, newcls=None): class RangeIDMapper(object): """Utility class to handle mapping from mmCIF IDs to - :class:`ihm.AsymUnitRange` or :class:`EntityRange` objects.""" + :class:`ihm.AsymUnitRange` or :class:`ihm.EntityRange` objects.""" def __init__(self): self._id_map = {} @@ -451,7 +451,7 @@ def __init__(self, model_class, starting_model_class): self.assemblies = IDMapper(self.system.orphan_assemblies, ihm.Assembly) #: Mapping from ID to :class:`ihm.AsymUnitRange` - #: or :class:`EntityRange` objects + #: or :class:`ihm.EntityRange` objects self.ranges = RangeIDMapper() #: Mapping from ID to :class:`ihm.location.Repository` objects @@ -634,7 +634,7 @@ def __init__(self, model_class, starting_model_class): ihm.multi_state_scheme.KineticRate) #: Mapping from ID to - #: :class:`ihm.multi_state_schene.RelaxationTime` objects + #: :class:`ihm.multi_state_scheme.RelaxationTime` objects self.relaxation_times = IDMapper( None, ihm.multi_state_scheme.RelaxationTime, @@ -801,7 +801,7 @@ def __init__(self, model_class, starting_model_class): self.flr_data, ihm.flr.FPSMPPModeling, *(None,) * 3) #: Mapping from ID to - #: :class:`ihm.flr.KineticRateFRETAnalysisConnection` objects + #: :class:`ihm.flr.KineticRateFretAnalysisConnection` objects self.flr_kinetic_rate_fret_analysis_connection = _FLRIDMapper( '_collection_flr_kinetic_rate_fret_analysis_connection', 'kinetic_rate_fret_analysis_connections', @@ -810,7 +810,7 @@ def __init__(self, model_class, starting_model_class): *(None,) * 3) #: Mapping from ID to - #: :class:`ihm.flr.KineticRateFRETAnalysisConnection` objects + #: :class:`ihm.flr.RelaxationTimeFretAnalysisConnection` objects self.flr_relaxation_time_fret_analysis_connection = _FLRIDMapper( '_collection_flr_relaxation_time_fret_analysis_connection', 'relaxation_time_fret_analysis_connections', @@ -1025,6 +1025,53 @@ def __call__(self, citation_id, name): s.authors.append(name) +class _DatabaseHandler(Handler): + category = '_database_2' + + def __call__(self, database_code, database_id, pdbx_doi, + pdbx_database_accession): + d = ihm.Database(id=database_id, code=database_code, + doi=pdbx_doi, accession=pdbx_database_accession) + self.system.databases.append(d) + + +class _DatabaseStatusHandler(Handler): + category = '_pdbx_database_status' + + # placeholder; the reader will otherwise only return strings or None + not_in_file = 0 + _keys = ['entry_id', 'sg_entry', 'author_approval_type', + 'author_release_status_code', 'date_author_approval', + 'date_author_release_request', 'date_begin_deposition', + 'date_begin_processing', 'date_begin_release_preparation', + 'date_chemical_shifts', 'date_coordinates', + 'date_deposition_form', 'date_end_processing', + 'date_hold_chemical_shifts', 'date_hold_coordinates', + 'date_hold_nmr_constraints', 'date_hold_struct_fact', + 'date_manuscript', 'date_nmr_constraints', 'date_of_pdb_release', + 'date_of_cs_release', 'date_of_mr_release', 'date_of_sf_release', + 'date_struct_fact', 'date_submitted', + 'dep_release_code_chemical_shifts', + 'dep_release_code_coordinates', + 'dep_release_code_nmr_constraints', 'dep_release_code_sequence', + 'dep_release_code_struct_fact', 'deposit_site', + 'hold_for_publication', 'methods_development_category', + 'name_depositor', 'pdb_date_of_author_approval', + 'pdb_format_compatible', 'process_site', 'rcsb_annotator', + 'recvd_author_approval', 'recvd_chemical_shifts', + 'recvd_coordinates', 'recvd_deposit_form', + 'recvd_initial_deposition_date', 'recvd_internal_approval', + 'recvd_manuscript', 'recvd_nmr_constraints', 'recvd_struct_fact', + 'status_code', 'status_code_cs', 'status_code_mr', + 'status_code_sf'] + + def __call__(self, *args): + # Just pass through all data items present in the file, as a dict + self.system._database_status = dict( + (k, v) for (k, v) in zip(self._keys, args) + if v != self.not_in_file) + + class _ChemCompHandler(Handler): category = '_chem_comp' @@ -1404,14 +1451,20 @@ def __init__(self, *args): and x[1] is not ihm.location.FileLocation) def __call__(self, content_type, id, reference_id, details, file_path, - file_size_bytes): + file_format, file_size_bytes): typ = None if content_type is None else content_type.lower() f = self.sysr.external_files.get_by_id( id, self.type_map.get(typ, ihm.location.FileLocation)) f.repo = self.sysr.repos.get_by_id(reference_id) - f.file_size = self.get_int(file_size_bytes) + # IHMCIF dictionary defines file size as a float, although only int + # values make sense, so allow for either ints or floats here + try: + f.file_size = self.get_int(file_size_bytes) + except ValueError: + f.file_size = self.get_float(file_size_bytes) self.copy_if_present( - f, locals(), keys=['details'], mapkeys={'file_path': 'path'}) + f, locals(), keys=['details', 'file_format'], + mapkeys={'file_path': 'path'}) # Handle DOI that is itself a file if file_path is None: f.path = '.' @@ -2043,24 +2096,28 @@ class _AtomSiteHandler(Handler): def __init__(self, *args): super(_AtomSiteHandler, self).__init__(*args) self._missing_sequence = collections.defaultdict(dict) + # Mapping from asym+auth_seq_id to internal ID self._seq_id_map = {} def _get_seq_id_from_auth(self, auth_seq_id, pdbx_pdb_ins_code, asym): """Get an internal seq_id for something not a polymer (nonpolymer, water, branched), given author-provided info""" if asym._id not in self._seq_id_map: - m = {} - # Make reverse mapping from author-provided info to seq_id - if isinstance(asym.auth_seq_id_map, dict): - for key, val in asym.auth_seq_id_map.items(): - m[val] = key - self._seq_id_map[asym._id] = m + self._seq_id_map[asym._id] = {} m = self._seq_id_map[asym._id] # Treat ? and . missing insertion codes equivalently if pdbx_pdb_ins_code is ihm.unknown: pdbx_pdb_ins_code = None - # If no match, use the author-provided numbering as-is - return m.get((auth_seq_id, pdbx_pdb_ins_code), auth_seq_id) + auth = (auth_seq_id, pdbx_pdb_ins_code) + if auth not in m: + # Assign a new ID starting from 1 + seq_id = len(m) + 1 + m[auth] = seq_id + # Add this info to the seq_id -> auth_seq_id mapping too + if asym.auth_seq_id_map == 0: + asym.auth_seq_id_map = {} + asym.auth_seq_id_map[seq_id] = (auth_seq_id, pdbx_pdb_ins_code) + return m[auth] def __call__(self, pdbx_pdb_model_num, label_asym_id, b_iso_or_equiv, label_seq_id, label_atom_id, type_symbol, cartn_x, cartn_y, @@ -2081,7 +2138,7 @@ def __call__(self, pdbx_pdb_model_num, label_asym_id, b_iso_or_equiv, asym = self.sysr.asym_units.get_by_id(label_asym_id) auth_seq_id = self.get_int_or_string(auth_seq_id) if seq_id is None: - # Fill in our internal seq_id if possible + # Fill in our internal seq_id using author-provided info our_seq_id = self._get_seq_id_from_auth( auth_seq_id, pdbx_pdb_ins_code, asym) else: @@ -2497,18 +2554,13 @@ def finalize(self): def _get_auth_seq_id_offset(self, asym): """Get the offset from seq_id to auth_seq_id. Return None if no consistent offset exists.""" - # Do nothing if the entity is not polymeric or branched - if asym.entity is None or (not asym.entity.is_polymeric() - and not asym.entity.is_branched()): + # Do nothing if the entity is not polymeric + if asym.entity is None or not asym.entity.is_polymeric(): return # Do nothing if no map exists if asym.auth_seq_id_map == 0: return - if asym.entity.is_branched(): - # Hack, as branched entities don't technically have seq_ids - rng = (1, len(asym.entity.sequence)) - else: - rng = asym.seq_id_range + rng = asym.seq_id_range offset = None for seq_id in range(rng[0], rng[1] + 1): # If a residue isn't in the map, it has an effective offset of 0, @@ -2535,6 +2587,10 @@ def _get_auth_seq_id_offset(self, asym): class _NonPolySchemeHandler(Handler): category = '_pdbx_nonpoly_scheme' + def __init__(self, *args): + super(_NonPolySchemeHandler, self).__init__(*args) + self._scheme = {} + def __call__(self, asym_id, entity_id, pdb_seq_num, mon_id, pdb_ins_code, pdb_strand_id, ndb_seq_num, auth_seq_num): entity = self.sysr.entities.get_by_id(entity_id) @@ -2550,71 +2606,137 @@ def __call__(self, asym_id, entity_id, pdb_seq_num, mon_id, pdb_ins_code, mon_id, name=entity.description) entity.sequence.append(s) asym = self.sysr.asym_units.get_by_id(asym_id) - if entity.type == 'water' and not isinstance(asym, ihm.WaterAsymUnit): - # Replace AsymUnit with WaterAsymUnit if necessary - asym.__class__ = ihm.WaterAsymUnit - asym._water_sequence = [entity.sequence[0]] - asym.number = 1 if pdb_strand_id not in (None, ihm.unknown, asym_id): asym._strand_id = pdb_strand_id pdb_seq_num = self.get_int_or_string(pdb_seq_num) auth_seq_num = self.get_int_or_string(auth_seq_num) - if entity.type == 'water': - # For waters, assume ndb_seq_num counts starting from 1, - # so use as our internal seq_id. Make sure the WaterAsymUnit - # is long enough to handle all ids - seq_id = self.get_int(ndb_seq_num) - if seq_id is None: - # If no ndb_seq_num, we cannot map - return - # Don't bother adding a 1->1 mapping - if (pdb_seq_num != seq_id - or pdb_ins_code not in (None, ihm.unknown)): - asym.number = max(asym.number, seq_id) - asym._water_sequence = [entity.sequence[0]] * asym.number - if asym.auth_seq_id_map == 0: - asym.auth_seq_id_map = {} - asym.auth_seq_id_map[seq_id] = (pdb_seq_num, pdb_ins_code) - # Note any residues that have different pdb_seq_num & auth_seq_num - if (auth_seq_num is not None and pdb_seq_num is not None - and auth_seq_num != pdb_seq_num): - if asym.orig_auth_seq_id_map is None: - asym.orig_auth_seq_id_map = {} - asym.orig_auth_seq_id_map[seq_id] = auth_seq_num - else: - # For nonpolymers, assume a single ChemComp with seq_id=1, - # but don't bother adding a 1->1 mapping - if pdb_seq_num != 1 or pdb_ins_code not in (None, ihm.unknown): - asym.auth_seq_id_map = {1: (pdb_seq_num, pdb_ins_code)} - # Note any residues that have different pdb_seq_num & auth_seq_num - if (auth_seq_num is not None and pdb_seq_num is not None - and auth_seq_num != pdb_seq_num): - asym.orig_auth_seq_id_map = {1: auth_seq_num} + ndb_seq_num = self.get_int(ndb_seq_num) + # Make mapping from author-provided numbering (*pdb_seq_num*, not + # auth_seq_num) to original and NDB numbering. We will use this at + # finalize time to map internal ID ("seq_id") to auth, orig_auth, + # and NDB numbering. + if asym_id not in self._scheme: + self._scheme[asym_id] = [] + self._scheme[asym_id].append((pdb_seq_num, pdb_ins_code, + auth_seq_num, ndb_seq_num)) + + def finalize(self): + for asym in self.system.asym_units: + entity = asym.entity + if entity is None or entity.is_polymeric() or entity.is_branched(): + continue + self._finalize_asym(asym) + + def _finalize_asym(self, asym): + # Add mapping info from scheme tables (to that already extracted + # from atom_site); if a mismatch we use atom_site info + scheme = self._scheme.get(asym._id) + if scheme: + if not asym.auth_seq_id_map: + asym.auth_seq_id_map = {} + if not asym.orig_auth_seq_id_map: + asym.orig_auth_seq_id_map = {} + # Make reverse mapping from author-provided info to internal ID + auth_map = {} + for key, val in asym.auth_seq_id_map.items(): + auth_map[val] = key + for pdb_seq_num, pdb_ins_code, auth_seq_num, ndb_seq_num in scheme: + auth = (pdb_seq_num, pdb_ins_code) + seq_id = auth_map.get(auth) + if seq_id is None: + seq_id = len(asym.auth_seq_id_map) + 1 + asym.auth_seq_id_map[seq_id] = auth + if pdb_seq_num != auth_seq_num: + asym.orig_auth_seq_id_map[seq_id] = auth_seq_num + if not asym.orig_auth_seq_id_map: + asym.orig_auth_seq_id_map = None + if asym.entity.type == 'water': + # Replace AsymUnit with WaterAsymUnit if necessary + if not isinstance(asym, ihm.WaterAsymUnit): + asym.__class__ = ihm.WaterAsymUnit + asym.number = len(asym.auth_seq_id_map) + asym._water_sequence = [asym.entity.sequence[0]] * asym.number + # todo: add mapping from seq_id to ndb numbering? class _BranchSchemeHandler(Handler): category = '_pdbx_branch_scheme' - def __call__(self, asym_id, num, pdb_seq_num, auth_seq_num, pdb_asym_id): + def __init__(self, *args): + super(_BranchSchemeHandler, self).__init__(*args) + self._scheme = {} + + def __call__(self, asym_id, num, pdb_seq_num, auth_seq_num, pdb_asym_id, + pdb_ins_code): asym = self.sysr.asym_units.get_by_id(asym_id) if pdb_asym_id not in (None, ihm.unknown, asym_id): asym._strand_id = pdb_asym_id pdb_seq_num = self.get_int_or_string(pdb_seq_num) auth_seq_num = self.get_int_or_string(auth_seq_num) num = self.get_int(num) - # Note any residues that have different num and auth_seq_id - # These will be finalized by _PolySeqSchemeHandler - if num is not None and pdb_seq_num is not None \ - and num != pdb_seq_num: - if asym.auth_seq_id_map == 0: - asym.auth_seq_id_map = {} - asym.auth_seq_id_map[num] = pdb_seq_num, None - # Note any residues that have different pdb_seq_num and auth_seq_num - if (num is not None and auth_seq_num is not None - and pdb_seq_num is not None and auth_seq_num != pdb_seq_num): - if asym.orig_auth_seq_id_map is None: - asym.orig_auth_seq_id_map = {} - asym.orig_auth_seq_id_map[num] = auth_seq_num + # Make mapping from author-provided numbering (*pdb_seq_num*, not + # auth_seq_num) to original and "num" numbering. We will use this at + # finalize time to map internal ID ("seq_id") to auth, orig_auth, + # and "num" numbering. + if asym_id not in self._scheme: + self._scheme[asym_id] = [] + self._scheme[asym_id].append((pdb_seq_num, pdb_ins_code, + auth_seq_num, num)) + + def finalize(self): + need_map_num = False + for asym in self.system.asym_units: + entity = asym.entity + if entity is None or not entity.is_branched(): + continue + self._finalize_asym(asym) + if asym.num_map: + need_map_num = True + if need_map_num: + self._reassign_seq_ids() + + def _reassign_seq_ids(self): + """Change provisional seq_ids so that they match + _pdbx_branch_scheme.num""" + for m in self.sysr.models.get_all(): + for atom in m._atoms: + if atom.asym_unit.num_map: + atom.seq_id = atom.asym_unit.num_map[atom.seq_id] + + def _finalize_asym(self, asym): + # Populate auth_seq_id mapping from scheme tables, and correct + # any incorrect seq_ids assigned in atom_site to use num + scheme = self._scheme.get(asym._id, []) + # Make reverse mapping from atom_site author-provided info + # to internal ID + auth_map = {} + if asym.auth_seq_id_map: + for key, val in asym.auth_seq_id_map.items(): + auth_map[val] = key + asym.auth_seq_id_map = {} + asym.orig_auth_seq_id_map = {} + asym.num_map = {} + for pdb_seq_num, pdb_ins_code, auth_seq_num, num in scheme: + asym.auth_seq_id_map[num] = (pdb_seq_num, pdb_ins_code) + if pdb_seq_num != auth_seq_num: + asym.orig_auth_seq_id_map[num] = auth_seq_num + as_seq_id = auth_map.get((pdb_seq_num, pdb_ins_code)) + if as_seq_id is not None: + if as_seq_id != num: + asym.num_map[as_seq_id] = num + del auth_map[(pdb_seq_num, pdb_ins_code)] + if not asym.orig_auth_seq_id_map: + asym.orig_auth_seq_id_map = None + if not asym.num_map: + asym.num_map = None + # If any residues from atom_site are left, we can't assign a num + # for them, so raise an error + if auth_map: + raise ValueError( + "For branched asym %s, the following author-provided " + "residue numbers (atom_site.auth_seq_id) are not present in " + "the pdbx_branch_scheme table: %s" + % (asym._id, ", ".join(repr(x[0]) for x in auth_map.keys()))) class _EntityBranchListHandler(Handler): @@ -3622,6 +3744,7 @@ class IHMVariant(Variant): _handlers = [ _CollectionHandler, _StructHandler, _SoftwareHandler, _CitationHandler, + _DatabaseHandler, _DatabaseStatusHandler, _AuditAuthorHandler, _GrantHandler, _CitationAuthorHandler, _ChemCompHandler, _ChemDescriptorHandler, _EntityHandler, _EntitySrcNatHandler, _EntitySrcGenHandler, _EntitySrcSynHandler, diff --git a/modules/core/dependency/python-ihm/ihm/restraint.py b/modules/core/dependency/python-ihm/ihm/restraint.py index a439f92f5f..8f5118a3cc 100644 --- a/modules/core/dependency/python-ihm/ihm/restraint.py +++ b/modules/core/dependency/python-ihm/ihm/restraint.py @@ -587,14 +587,14 @@ def _all_entities_or_asyms(self): class ResidueFeature(Feature): """Selection of one or more residues from the system. - Residues can be selected from both :class:`AsymUnit` and - :class:`Entity` (the latter implies that it selects residues + Residues can be selected from both :class:`ihm.AsymUnit` and + :class:`ihm.Entity` (the latter implies that it selects residues in all instances of that entity). Individual residues can - also be selected by passing :class:`Residue` objects. + also be selected by passing :class:`ihm.Residue` objects. - :param sequence ranges: A list of :class:`AsymUnitRange`, - :class:`AsymUnit`, :class:`EntityRange`, :class:`Residue`, - and/or :class:`Entity` objects. + :param sequence ranges: A list of :class:`ihm.AsymUnitRange`, + :class:`ihm.AsymUnit`, :class:`ihm.EntityRange`, + :class:`ihm.Residue`, and/or :class:`ihm.Entity` objects. :param str details: Additional text describing this feature. """ @@ -628,8 +628,8 @@ def _get_entity(x): class AtomFeature(Feature): """Selection of one or more atoms from the system. Atoms can be selected from polymers or non-polymers (but not both). - Atoms can also be selected from both :class:`AsymUnit` and - :class:`Entity` (the latter implies that it selects atoms + Atoms can also be selected from both :class:`ihm.AsymUnit` and + :class:`ihm.Entity` (the latter implies that it selects atoms in all instances of that entity). For selecting an entire polymer or residue(s), see :class:`ResidueFeature`. For selecting an entire non-polymer, @@ -659,12 +659,12 @@ class NonPolyFeature(Feature): """Selection of one or more non-polymers from the system. To select individual atoms from a non-polymer, see :class:`AtomFeature`. - Features can include both :class:`AsymUnit` and - :class:`Entity` (the latter implies that it selects non-polymers + Features can include both :class:`ihm.AsymUnit` and + :class:`ihm.Entity` (the latter implies that it selects non-polymers in all instances of that entity). - :param sequence objs: A list of :class:`AsymUnit` and/or - :class:`Entity` objects. + :param sequence objs: A list of :class:`ihm.AsymUnit` and/or + :class:`ihm.Entity` objects. :param str details: Additional text describing this feature. """ diff --git a/modules/core/dependency/python-ihm/ihm/source.py b/modules/core/dependency/python-ihm/ihm/source.py index ab3f3af3e9..8a2c96153a 100644 --- a/modules/core/dependency/python-ihm/ihm/source.py +++ b/modules/core/dependency/python-ihm/ihm/source.py @@ -29,7 +29,7 @@ def __init__(self, ncbi_taxonomy_id=None, scientific_name=None, class Manipulated(Source): """An entity isolated from a genetically manipulated source. - See :class:`Entity`. + See :class:`ihm.Entity`. :param gene: Details about the gene source. :type gene: :class:`Details` @@ -43,12 +43,12 @@ def __init__(self, gene=None, host=None): class Natural(Source, Details): - """An entity isolated from a natural source. See :class:`Entity`. + """An entity isolated from a natural source. See :class:`ihm.Entity`. See :class:`Details` for a description of the parameters.""" src_method = 'nat' class Synthetic(Source, Details): - """An entity obtained synthetically. See :class:`Entity`. + """An entity obtained synthetically. See :class:`ihm.Entity`. See :class:`Details` for a description of the parameters.""" src_method = 'syn' diff --git a/modules/core/dependency/python-ihm/ihm/util.py b/modules/core/dependency/python-ihm/ihm/util/__init__.py similarity index 100% rename from modules/core/dependency/python-ihm/ihm/util.py rename to modules/core/dependency/python-ihm/ihm/util/__init__.py diff --git a/modules/core/dependency/python-ihm/ihm/util/make_mmcif.py b/modules/core/dependency/python-ihm/ihm/util/make_mmcif.py new file mode 100644 index 0000000000..5bb9082ac7 --- /dev/null +++ b/modules/core/dependency/python-ihm/ihm/util/make_mmcif.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 + +""" +Add minimal IHM-related tables to an mmCIF file. + +Given any mmCIF file as input, this script will add any missing +IHM-related tables and write out a new file that is minimally compliant +with the IHM dictionary. + +This is done by simply reading in the original file with python-ihm and +then writing it out again, so + a) any data in the input file that is not understood by python-ihm + will be lost on output; and + b) input files that aren't compliant with the PDBx dictionary, or that + contain syntax errors or other problems, may crash or otherwise confuse + python-ihm. + +The --add option can also be used to combine multiple input mmCIF files into +one. This is typically used when the mmCIF files contain models with +differing composition. Only model (coordinate) information is combined, not +other IHM information such as starting models or restraints. +""" + + +import ihm.reader +import ihm.dumper +import ihm.model +import ihm.protocol +import os +import argparse + + +def add_ihm_info(s): + if not s.title: + s.title = 'Auto-generated system' + + # Simple default assembly containing all chains + default_assembly = ihm.Assembly(s.asym_units, name='Modeled assembly') + + # Simple default atomic representation for everything + default_representation = ihm.representation.Representation( + [ihm.representation.AtomicSegment(asym, rigid=False) + for asym in s.asym_units]) + + # Simple default modeling protocol + default_protocol = ihm.protocol.Protocol(name='modeling') + + for state_group in s.state_groups: + for state in state_group: + for model_group in state: + for model in model_group: + if not model.assembly: + model.assembly = default_assembly + if not model.representation: + model.representation = default_representation + if not model.protocol: + model.protocol = default_protocol + return s + + +def add_ihm_info_one_system(fname): + """Read mmCIF file `fname`, which must contain a single System, and + return it with any missing IHM data added.""" + with open(fname) as fh: + systems = ihm.reader.read(fh) + if len(systems) != 1: + raise ValueError("mmCIF file %s must contain exactly 1 data block " + "(%d found)" % (fname, len(systems))) + return add_ihm_info(systems[0]) + + +def combine(s, other_s): + """Add models from the System `other_s` into the System `s`. + After running this function, `s` will contain all Models from both + systems. The models are added to new StateGroup(s) in `s`. + Note that this function also modifies `other_s` in place, so that + System should no longer be used after calling this function.""" + # First map all Entity and AsymUnit objects in `other_s` to equivalent + # objects in `s` + entity_map = combine_entities(s, other_s) + asym_map = combine_asyms(s, other_s, entity_map) + # Now handle the Models themselves + combine_atoms(s, other_s, asym_map) + + +def combine_entities(s, other_s): + """Add `other_s` entities into `s`. Returns a dict that maps Entities + in `other_s` to equivalent objects in `s`.""" + entity_map = {} + sequences = dict((e.sequence, e) for e in s.entities) + for e in other_s.entities: + if e.sequence in sequences: + # If the `other_s` Entity already exists in `s`, map to it + entity_map[e] = sequences[e.sequence] + else: + # Otherwise, add the `other_s` Entity to `s` + s.entities.append(e) + entity_map[e] = e + return entity_map + + +def combine_asyms(s, other_s, entity_map): + """Add `other_s` asyms into `s`. Returns a dict that maps AsymUnits + in `other_s` to equivalent objects in `s`.""" + asym_map = {} + # Collect author-provided information for existing asyms. For polymers, + # we use the author-provided chain ID; for non-polymers, we also use + # the author-provided residue number of the first (only) residue + poly_asyms = dict(((a.entity, a.strand_id), a) + for a in s.asym_units if a.entity.is_polymeric()) + nonpoly_asyms = dict(((a.entity, a.strand_id, a.auth_seq_id_map[1]), a) + for a in s.asym_units + if a.entity.type == 'non-polymer') + + def map_asym(asym, orig_asym): + if orig_asym: + # If an equivalent asym already exists, use it (and its asym_id) + asym_map[asym] = orig_asym + else: + # Otherwise, add a new asym + asym_map[asym] = asym + asym.id = None # Assign new ID + s.asym_units.append(asym) + + for asym in other_s.asym_units: + # Point to Entity in `s`, not `other_s` + asym.entity = entity_map[asym.entity] + # For polymers and non-polymers, if an asym in `other_s` has the + # same author-provided information and entity_id as an asym in `s`, + # reuse the asym_id + if asym.entity.is_polymeric(): + map_asym(asym, poly_asyms.get((asym.entity, asym.strand_id))) + elif asym.entity.type == 'non-polymer': + map_asym(asym, nonpoly_asyms.get((asym.entity, asym.strand_id, + asym.auth_seq_id_map[1]))) + else: + # For waters and branched entities, always assign a new asym_id + asym_map[asym] = asym + asym.id = None # Assign new ID + s.asym_units.append(asym) + return asym_map + + +def combine_atoms(s, other_s, asym_map): + """Add `other_s` atoms into `s`""" + seen_asmb = set() + seen_rep = set() + for state_group in other_s.state_groups: + for state in state_group: + for model_group in state: + for model in model_group: + # Assembly, Representation and Atom and Sphere objects + # all reference `other_s` asyms. We must map these to + # asyms in `s`. + asmb = model.assembly + if id(asmb) not in seen_asmb: + seen_asmb.add(id(asmb)) + # todo: also handle AsymUnitRange + asmb[:] = [asym_map[asym] for asym in asmb] + rep = model.representation + if id(rep) not in seen_rep: + seen_rep.add(id(rep)) + for seg in rep: + seg.asym_unit = asym_map[seg.asym_unit] + for atom in model._atoms: + atom.asym_unit = asym_map[atom.asym_unit] + for sphere in model._spheres: + sphere.asym_unit = asym_map[sphere.asym_unit] + + # Add all models as new state groups + s.state_groups.extend(other_s.state_groups) + + +def get_args(): + p = argparse.ArgumentParser( + description="Add minimal IHM-related tables to an mmCIF file.") + p.add_argument("input", metavar="input.cif", help="input mmCIF file name") + p.add_argument("output", metavar="output.cif", + help="output mmCIF file name", + default="output.cif", nargs="?") + p.add_argument("--add", "-a", action='append', metavar="add.cif", + help="also add model information from the named mmCIF " + "file to the output file") + return p.parse_args() + + +def main(): + args = get_args() + + if (os.path.exists(args.input) and os.path.exists(args.output) + and os.path.samefile(args.input, args.output)): + raise ValueError("Input and output are the same file") + + if args.add: + s = add_ihm_info_one_system(args.input) + for other in args.add: + other_s = add_ihm_info_one_system(other) + combine(s, other_s) + with open(args.output, 'w') as fhout: + ihm.dumper.write( + fhout, [s], + variant=ihm.dumper.IgnoreVariant(['_audit_conform'])) + else: + with open(args.input) as fh: + with open(args.output, 'w') as fhout: + ihm.dumper.write( + fhout, [add_ihm_info(s) for s in ihm.reader.read(fh)], + variant=ihm.dumper.IgnoreVariant(['_audit_conform'])) + + +if __name__ == '__main__': + main() diff --git a/modules/core/dependency/python-ihm/make-release.sh b/modules/core/dependency/python-ihm/make-release.sh index 28f634a71f..9a0ed7fd19 100755 --- a/modules/core/dependency/python-ihm/make-release.sh +++ b/modules/core/dependency/python-ihm/make-release.sh @@ -4,7 +4,8 @@ # - Update AuditConformDumper to match latest IHM dictionary if necessary # - Run util/validate-outputs.py to make sure all example outputs validate # (cd util; PYTHONPATH=.. python3 ./validate-outputs.py) -# - Update ChangeLog.rst and util/python-ihm.spec with the release number +# - Update ChangeLog.rst, util/debian/changelog, and util/python-ihm.spec +# with the release number and date # - Update release number in ihm/__init__.py, MANIFEST.in, and setup.py # - Commit, tag, and push # - Make release on GitHub @@ -23,4 +24,4 @@ python3 setup.py sdist rm -f "src/ihm_format_wrap_${VERSION}.c" echo "Now use 'twine upload dist/ihm-${VERSION}.tar.gz' to publish the release on PyPi." -echo "Then, update the conda-forge, COPR, and Homebrew packages to match." +echo "Then, update the conda-forge, COPR, PPA, and Homebrew packages to match." diff --git a/modules/core/dependency/python-ihm/setup.py b/modules/core/dependency/python-ihm/setup.py index 473459fd84..d54a6555a9 100755 --- a/modules/core/dependency/python-ihm/setup.py +++ b/modules/core/dependency/python-ihm/setup.py @@ -7,7 +7,7 @@ import sys import os -VERSION = "0.43" +VERSION = "1.1" copy_args = sys.argv[1:] @@ -52,7 +52,7 @@ author_email='ben@salilab.org', url='https://github.com/ihmwg/python-ihm', ext_modules=mod, - packages=['ihm'], + packages=['ihm', 'ihm.util'], install_requires=['msgpack'], classifiers=[ "Programming Language :: Python :: 2.7", diff --git a/modules/core/dependency/python-ihm/test/input/mini_add.cif b/modules/core/dependency/python-ihm/test/input/mini_add.cif new file mode 100644 index 0000000000..3f2743baec --- /dev/null +++ b/modules/core/dependency/python-ihm/test/input/mini_add.cif @@ -0,0 +1,55 @@ +data_model +# +_exptl.method 'model, MODELLER Version 9.24 2020/08/21 11:54:31' +# +_modeller.version 9.24 +# +loop_ +_struct_asym.id +_struct_asym.entity_id +_struct_asym.details +A 1 ? +B 2 ? +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +1 1 VAL +1 2 GLY +1 3 GLN +1 4 GLN +1 5 TYR +1 6 SER +1 7 SER +2 1 PRO +2 2 GLU +# +loop_ +_atom_site.group_PDB +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.auth_asym_id +_atom_site.label_seq_id +_atom_site.auth_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.label_entity_id +_atom_site.id +_atom_site.pdbx_PDB_model_num +ATOM C CA . VAL A A 1 2 ? 114.370 27.980 -26.088 1.000 143.490 1 2 1 +ATOM C CA . GLY A A 2 3 ? 111.506 26.368 -28.075 1.000 137.530 1 9 1 +ATOM C CA . GLN A A 3 4 ? 113.468 23.113 -28.639 1.000 128.420 1 13 1 +ATOM C CA . GLN A A 4 5 ? 113.808 21.534 -32.168 1.000 117.620 1 22 1 +ATOM C CA . TYR A A 5 6 ? 116.743 22.770 -34.259 1.000 103.700 1 31 1 +ATOM C CA . SER A A 6 7 ? 116.626 25.161 -37.229 1.000 93.490 1 43 1 +ATOM C CA . SER A A 7 8 ? 119.165 25.590 -40.036 1.000 87.320 1 49 1 +ATOM C CA . PRO B B 1 3 ? 70.427 58.819 51.717 1.000 152.390 2 55 1 +ATOM C CA . GLU B B 2 4 ? 68.584 58.274 48.425 1.000 152.090 2 63 1 diff --git a/modules/core/dependency/python-ihm/test/input/mini_nonpoly.cif b/modules/core/dependency/python-ihm/test/input/mini_nonpoly.cif new file mode 100644 index 0000000000..ae3d6db2d3 --- /dev/null +++ b/modules/core/dependency/python-ihm/test/input/mini_nonpoly.cif @@ -0,0 +1,82 @@ +data_model +_entry.id model +_struct.entry_id model +_struct.pdbx_model_details . +_struct.pdbx_structure_determination_methodology integrative +_struct.title . +_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/IHM-dictionary/9be59e1/ihm-extension.dic +_audit_conform.dict_name ihm-extension.dic +_audit_conform.dict_version 1.24 +# +loop_ +_chem_comp.id +_chem_comp.type +_chem_comp.name +_chem_comp.formula +_chem_comp.formula_weight +HEM non-polymer 'PROTOPORPHYRIN IX CONTAINING FE' 'C34 H32 Fe N4 O4' 616.499 +# +# +loop_ +_entity.id +_entity.type +_entity.src_method +_entity.pdbx_description +_entity.formula_weight +_entity.pdbx_number_of_molecules +_entity.details +1 non-polymer man Heme 616.499 2 . +# +# +loop_ +_pdbx_entity_nonpoly.entity_id +_pdbx_entity_nonpoly.name +_pdbx_entity_nonpoly.comp_id +1 Heme HEM +# +# +loop_ +_struct_asym.id +_struct_asym.entity_id +_struct_asym.details +A 1 'First heme' +B 1 'Second heme' +# +# +loop_ +_pdbx_nonpoly_scheme.asym_id +_pdbx_nonpoly_scheme.entity_id +_pdbx_nonpoly_scheme.mon_id +_pdbx_nonpoly_scheme.ndb_seq_num +_pdbx_nonpoly_scheme.pdb_seq_num +_pdbx_nonpoly_scheme.auth_seq_num +_pdbx_nonpoly_scheme.auth_mon_id +_pdbx_nonpoly_scheme.pdb_strand_id +_pdbx_nonpoly_scheme.pdb_ins_code +A 1 HEM 1 100 100 HEM A . +B 1 HEM 1 200 200 HEM B . +# +# +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_seq_id +_atom_site.auth_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.label_asym_id +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.label_entity_id +_atom_site.auth_asym_id +_atom_site.auth_comp_id +_atom_site.B_iso_or_equiv +_atom_site.pdbx_PDB_model_num +_atom_site.ihm_model_id +HETATM 1 FE FE . HEM . 100 ? A 0 0 0 . 1 A HEM . 1 1 +HETATM 2 FE FE . HEM . 200 ? B 10.000 10.000 10.000 . 1 B HEM . 1 1 diff --git a/modules/core/dependency/python-ihm/test/input/mini_nonpoly_add.cif b/modules/core/dependency/python-ihm/test/input/mini_nonpoly_add.cif new file mode 100644 index 0000000000..c61f3a60d4 --- /dev/null +++ b/modules/core/dependency/python-ihm/test/input/mini_nonpoly_add.cif @@ -0,0 +1,82 @@ +data_model +_entry.id model +_struct.entry_id model +_struct.pdbx_model_details . +_struct.pdbx_structure_determination_methodology integrative +_struct.title . +_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/IHM-dictionary/9be59e1/ihm-extension.dic +_audit_conform.dict_name ihm-extension.dic +_audit_conform.dict_version 1.24 +# +loop_ +_chem_comp.id +_chem_comp.type +_chem_comp.name +_chem_comp.formula +_chem_comp.formula_weight +HEM non-polymer 'PROTOPORPHYRIN IX CONTAINING FE' 'C34 H32 Fe N4 O4' 616.499 +# +# +loop_ +_entity.id +_entity.type +_entity.src_method +_entity.pdbx_description +_entity.formula_weight +_entity.pdbx_number_of_molecules +_entity.details +1 non-polymer man Heme 616.499 2 . +# +# +loop_ +_pdbx_entity_nonpoly.entity_id +_pdbx_entity_nonpoly.name +_pdbx_entity_nonpoly.comp_id +1 Heme HEM +# +# +loop_ +_struct_asym.id +_struct_asym.entity_id +_struct_asym.details +A 1 'First heme' +B 1 'Second heme' +# +# +loop_ +_pdbx_nonpoly_scheme.asym_id +_pdbx_nonpoly_scheme.entity_id +_pdbx_nonpoly_scheme.mon_id +_pdbx_nonpoly_scheme.ndb_seq_num +_pdbx_nonpoly_scheme.pdb_seq_num +_pdbx_nonpoly_scheme.auth_seq_num +_pdbx_nonpoly_scheme.auth_mon_id +_pdbx_nonpoly_scheme.pdb_strand_id +_pdbx_nonpoly_scheme.pdb_ins_code +A 1 HEM 1 100 100 HEM A . +B 1 HEM 1 800 800 HEM B . +# +# +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_seq_id +_atom_site.auth_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.label_asym_id +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.label_entity_id +_atom_site.auth_asym_id +_atom_site.auth_comp_id +_atom_site.B_iso_or_equiv +_atom_site.pdbx_PDB_model_num +_atom_site.ihm_model_id +HETATM 1 FE FE . HEM . 100 ? A 0 0 0 . 1 A HEM . 1 1 +HETATM 2 FE FE . HEM . 800 ? B 10.000 10.000 10.000 . 1 B HEM . 1 1 diff --git a/modules/core/dependency/python-ihm/test/test_dumper.py b/modules/core/dependency/python-ihm/test/test_dumper.py index a49574f7fc..54208f7bf8 100644 --- a/modules/core/dependency/python-ihm/test/test_dumper.py +++ b/modules/core/dependency/python-ihm/test/test_dumper.py @@ -114,7 +114,7 @@ def test_audit_conform_dumper(self): lines = sorted(out.split('\n')) self.assertEqual(lines[1].split()[0], "_audit_conform.dict_location") self.assertEqual(lines[2].rstrip('\r\n'), - "_audit_conform.dict_name ihm-extension.dic") + "_audit_conform.dict_name mmcif_ihm.dic") self.assertEqual(lines[3].split()[0], "_audit_conform.dict_version") def test_struct_dumper(self): @@ -426,9 +426,10 @@ def test_entity_duplicates(self): def test_entity_duplicate_branched(self): """Test EntityDumper with duplicate branched entities""" system = ihm.System() - sacc = ihm.SaccharideChemComp('NAG') - system.entities.append(ihm.Entity([sacc])) - system.entities.append(ihm.Entity([sacc])) + sacc1 = ihm.SaccharideChemComp('NAG') + sacc2 = ihm.SaccharideChemComp('FUC') + system.entities.append(ihm.Entity([sacc1, sacc2])) + system.entities.append(ihm.Entity([sacc1, sacc2])) dumper = ihm.dumper._EntityDumper() dumper.finalize(system) # Assign IDs out = _get_dumper_output(dumper, system) @@ -863,7 +864,8 @@ def test_entity_nonpoly_dumper(self): e2 = ihm.Entity([ihm.NonPolymerChemComp('HEM')], description='heme') e3 = ihm.Entity([ihm.WaterChemComp()]) # Branched entity - e4 = ihm.Entity([ihm.SaccharideChemComp('NAG')]) + e4 = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')]) system.entities.extend((e1, e2, e3, e4)) ed = ihm.dumper._EntityDumper() @@ -1197,7 +1199,8 @@ def test_external_reference_dumper(self): 'bar')) repo3 = ihm.location.Repository(doi="10.5281/zenodo.58025", url='foo.spd') - loc = ihm.location.InputFileLocation(repo=repo1, path='bar') + loc = ihm.location.InputFileLocation(repo=repo1, path='bar', + file_format='TXT') system.locations.append(loc) # Duplicates should be ignored loc = ihm.location.InputFileLocation(repo=repo1, path='bar') @@ -1259,14 +1262,15 @@ def test_external_reference_dumper(self): _ihm_external_files.reference_id _ihm_external_files.file_path _ihm_external_files.content_type +_ihm_external_files.file_format _ihm_external_files.file_size_bytes _ihm_external_files.details -1 1 bar 'Input data or restraints' . . -2 1 baz 'Input data or restraints' . . -3 2 foo/bar/baz 'Modeling or post-processing output' . . -4 3 foo.spd 'Input data or restraints' . 'EM micrographs' -5 3 . 'Input data or restraints' . 'EM micrographs' -6 4 %s 'Modeling workflow or script' 4 . +1 1 bar 'Input data or restraints' TXT . . +2 1 baz 'Input data or restraints' . . . +3 2 foo/bar/baz 'Modeling or post-processing output' . . . +4 3 foo.spd 'Input data or restraints' . . 'EM micrographs' +5 3 . 'Input data or restraints' . . 'EM micrographs' +6 4 %s 'Modeling workflow or script' . 4 . # """ % bar.replace(os.sep, '/')) @@ -5064,7 +5068,8 @@ def test_entity_branch_list_dumper(self): """Test EntityBranchListDumper""" system = ihm.System() system.entities.append(ihm.Entity( - [ihm.SaccharideChemComp('NAG')])) + [ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')])) # Non-branched entity system.entities.append(ihm.Entity('ACGT')) ed = ihm.dumper._EntityDumper() @@ -5078,6 +5083,7 @@ def test_entity_branch_list_dumper(self): _pdbx_entity_branch_list.comp_id _pdbx_entity_branch_list.hetero 1 1 NAG . +1 2 FUC . # """) @@ -5085,7 +5091,8 @@ def test_entity_branch_dumper(self): """Test EntityBranchDumper""" system = ihm.System() system.entities.append(ihm.Entity( - [ihm.SaccharideChemComp('NAG')])) + [ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')])) # Non-branched entity system.entities.append(ihm.Entity('ACGT')) ed = ihm.dumper._EntityDumper() @@ -5103,14 +5110,21 @@ def test_entity_branch_dumper(self): def test_branch_scheme_dumper(self): """Test BranchSchemeDumper""" system = ihm.System() - e1 = ihm.Entity([ihm.SaccharideChemComp('NAG')]) - e2 = ihm.Entity([ihm.SaccharideChemComp('FUC')]) + e1 = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')]) + e2 = ihm.Entity([ihm.SaccharideChemComp('FUC'), + ihm.SaccharideChemComp('BGC')]) + e3 = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('BGC')]) # Non-branched entity - e3 = ihm.Entity('ACT') - system.entities.extend((e1, e2, e3)) + e4 = ihm.Entity('ACT') + system.entities.extend((e1, e2, e3, e4)) system.asym_units.append(ihm.AsymUnit(e1, 'foo')) system.asym_units.append(ihm.AsymUnit(e2, 'bar', auth_seq_id_map=5)) - system.asym_units.append(ihm.AsymUnit(e3, 'baz')) + system.asym_units.append(ihm.AsymUnit( + e3, 'bar', auth_seq_id_map={1: 6, 2: (7, 'A')}, + orig_auth_seq_id_map={1: 100})) + system.asym_units.append(ihm.AsymUnit(e4, 'baz')) ihm.dumper._EntityDumper().finalize(system) ihm.dumper._StructAsymDumper().finalize(system) dumper = ihm.dumper._BranchSchemeDumper() @@ -5122,12 +5136,17 @@ def test_branch_scheme_dumper(self): _pdbx_branch_scheme.mon_id _pdbx_branch_scheme.num _pdbx_branch_scheme.pdb_seq_num +_pdbx_branch_scheme.pdb_ins_code _pdbx_branch_scheme.auth_seq_num _pdbx_branch_scheme.auth_mon_id _pdbx_branch_scheme.pdb_mon_id _pdbx_branch_scheme.pdb_asym_id -A 1 NAG 1 1 1 NAG NAG A -B 2 FUC 1 6 6 FUC FUC B +A 1 NAG 1 1 . 1 NAG NAG A +A 1 FUC 2 2 . 2 FUC FUC A +B 2 FUC 1 6 . 6 FUC FUC B +B 2 BGC 2 7 . 7 BGC BGC B +C 3 NAG 1 6 . 100 NAG NAG C +C 3 BGC 2 7 A 7 BGC BGC C # """) @@ -5191,6 +5210,50 @@ def test_branch_link_dumper(self): # """) + def test_database_dumper(self): + """Test DatabaseDumper""" + system = ihm.System() + dumper = ihm.dumper._DatabaseDumper() + out = _get_dumper_output(dumper, system) + self.assertEqual(out, '') + + system = ihm.System( + databases=[ihm.Database(id='foo', code='bar'), + ihm.Database(id='baz', code='1abc', accession='1abcxyz', + doi='1.2.3.4')]) + dumper = ihm.dumper._DatabaseDumper() + out = _get_dumper_output(dumper, system) + self.assertEqual(out, """# +loop_ +_database_2.database_id +_database_2.database_code +_database_2.pdbx_database_accession +_database_2.pdbx_DOI +foo bar . . +baz 1abc 1abcxyz 1.2.3.4 +# +""") + + def test_database_status_dumper(self): + """Test DatabaseStatusDumper""" + system = ihm.System() + system._database_status = { + 'status_code': 'REL', 'entry_id': '5FD1', + 'recvd_initial_deposition_date': '1993-06-29', + 'deposit_site': ihm.unknown, 'process_site': 'BNL', + 'sg_entry': None} + dumper = ihm.dumper._DatabaseStatusDumper() + out = _get_dumper_output(dumper, system) + # sort to remove dict order + self.assertEqual("\n".join(sorted(out.split('\n'))), + """ +_pdbx_database_status.deposit_site ? +_pdbx_database_status.entry_id 5FD1 +_pdbx_database_status.process_site BNL +_pdbx_database_status.recvd_initial_deposition_date 1993-06-29 +_pdbx_database_status.sg_entry . +_pdbx_database_status.status_code REL""") + if __name__ == '__main__': unittest.main() diff --git a/modules/core/dependency/python-ihm/test/test_examples.py b/modules/core/dependency/python-ihm/test/test_examples.py index c0a78e94d2..945e8b9983 100644 --- a/modules/core/dependency/python-ihm/test/test_examples.py +++ b/modules/core/dependency/python-ihm/test/test_examples.py @@ -55,7 +55,7 @@ def test_locations_example(self): # can read it with open(out) as fh: contents = fh.readlines() - self.assertEqual(len(contents), 70) + self.assertEqual(len(contents), 71) with open(out) as fh: s, = ihm.reader.read(fh) os.unlink(out) diff --git a/modules/core/dependency/python-ihm/test/test_format.py b/modules/core/dependency/python-ihm/test/test_format.py index 94a2783b99..170432aadd 100644 --- a/modules/core/dependency/python-ihm/test/test_format.py +++ b/modules/core/dependency/python-ihm/test/test_format.py @@ -800,6 +800,258 @@ def __call__(self): _format.ihm_reader_free(reader) fh.close() + def test_preserving_tokenizer_get_token(self): + """Test _PreservingCifTokenizer._get_token()""" + cif = """ +# Full line comment +_cat1.Foo baz # End of line comment +""" + t = ihm.format._PreservingCifTokenizer(StringIO(cif)) + tokens = [t._get_token() for _ in range(11)] + self.assertIsInstance(tokens[0], ihm.format._EndOfLineToken) + self.assertIsInstance(tokens[1], ihm.format._CommentToken) + self.assertEqual(tokens[1].txt, ' Full line comment') + self.assertIsInstance(tokens[2], ihm.format._EndOfLineToken) + self.assertIsInstance(tokens[3], ihm.format._PreservingVariableToken) + self.assertEqual(tokens[3].category, '_cat1') + self.assertEqual(tokens[3].keyword, 'foo') + self.assertEqual(tokens[3].orig_keyword, 'Foo') + self.assertIsInstance(tokens[4], ihm.format._WhitespaceToken) + self.assertEqual(tokens[4].txt, ' ') + self.assertIsInstance(tokens[5], ihm.format._TextValueToken) + self.assertEqual(tokens[5].txt, 'baz') + self.assertIsInstance(tokens[6], ihm.format._WhitespaceToken) + self.assertEqual(tokens[6].txt, ' ') + self.assertIsInstance(tokens[7], ihm.format._CommentToken) + self.assertEqual(tokens[7].txt, ' End of line comment') + self.assertIsInstance(tokens[8], ihm.format._EndOfLineToken) + self.assertIsNone(tokens[9]) + self.assertIsNone(tokens[10]) + + # Make sure we can reconstruct the original mmCIF from the tokens + new_cif = "".join(x.as_mmcif() for x in tokens[:-2]) + self.assertEqual(new_cif, cif) + + def test_preserving_tokenizer_reconstruct(self): + """Make sure _PreservingCifTokenizer can reconstruct original mmCIF""" + cif = """ +data_foo_bar +# +_cat1.foo ? +# +_cat2.BaR . +# +loop_ +foo.bar +foo.baz +foo.single +foo.double +foo.multi +x . 'single' "double" +;multi +; +""" + t = ihm.format._PreservingCifTokenizer(StringIO(cif)) + tokens = [] + while True: + tok = t._get_token() + if tok is None: + break + else: + tokens.append(tok) + new_cif = "".join(x.as_mmcif() for x in tokens) + self.assertEqual(new_cif, cif) + + def test_preserving_variable_token(self): + """Test _PreservingVariableToken class""" + t = ihm.format._PreservingVariableToken("foo.BAR", 1) + self.assertEqual(t.keyword, 'bar') + self.assertEqual(t.orig_keyword, 'BAR') + self.assertEqual(t.as_mmcif(), 'foo.BAR') + t.keyword = 'baz' + self.assertEqual(t.as_mmcif(), 'foo.baz') + + def test_preserving_cif_reader(self): + """Test _PreservingCifReader class""" + cif = """ +data_foo_bar +# +_cat1.foo ? +# +loop_ +_foo.bar +_foo.baz +a b c d +x y +""" + r = ihm.format._PreservingCifReader(StringIO(cif)) + tokens = list(r.read_file()) + self.assertIsInstance(tokens[5], ihm.format._CategoryTokenGroup) + self.assertIsInstance(tokens[8], ihm.format._LoopHeaderTokenGroup) + self.assertIsInstance(tokens[9], ihm.format._LoopRowTokenGroup) + self.assertIsInstance(tokens[10], ihm.format._LoopRowTokenGroup) + self.assertIsInstance(tokens[11], ihm.format._LoopRowTokenGroup) + new_cif = "".join(x.as_mmcif() for x in tokens) + self.assertEqual(new_cif, cif) + + def test_preserving_cif_reader_filter(self): + """Test _PreservingCifReader class with filters""" + cif = """ +data_foo_bar +# +_cat1.bar old +# +loop_ +_foo.bar +_foo.baz +a b c d +x y +""" + r = ihm.format._PreservingCifReader(StringIO(cif)) + filters = [ihm.format._ChangeValueFilter(".bar", old='old', new='new'), + ihm.format._ChangeValueFilter(".bar", old='a', new='newa'), + ihm.format._ChangeValueFilter(".foo", old='old', new='new')] + tokens = list(r.read_file(filters)) + new_cif = "".join(x.as_mmcif() for x in tokens) + self.assertEqual(new_cif, """ +data_foo_bar +# +_cat1.bar new +# +loop_ +_foo.bar +_foo.baz +newa b c d +x y +""") + + def test_category_token_group(self): + """Test CategoryTokenGroup class""" + var = ihm.format._PreservingVariableToken("_foo.bar", 1) + space = ihm.format._WhitespaceToken(" ") + val = ihm.format._TextValueToken("baz", quote=None) + tg = ihm.format._CategoryTokenGroup( + var, ihm.format._SpacedToken([space], val)) + self.assertEqual(str(tg), "<_CategoryTokenGroup(_foo.bar, baz)>") + self.assertEqual(tg.as_mmcif(), '_foo.bar baz\n') + self.assertEqual(tg.category, "_foo") + self.assertEqual(tg.keyword, "bar") + self.assertEqual(tg.value, "baz") + tg.value = None + self.assertIsNone(tg.value) + + def test_spaced_token(self): + """Test SpacedToken class""" + space = ihm.format._WhitespaceToken(" ") + val = ihm.format._TextValueToken("baz", quote=None) + sp = ihm.format._SpacedToken([space], val) + self.assertEqual(sp.as_mmcif(), " baz") + self.assertEqual(sp.value, 'baz') + sp.value = None + self.assertIsNone(sp.value) + self.assertEqual(sp.as_mmcif(), ' .') + sp.value = ihm.unknown + self.assertIs(sp.value, ihm.unknown) + self.assertEqual(sp.as_mmcif(), ' ?') + sp.value = "test value" + self.assertEqual(sp.as_mmcif(), ' "test value"') + + def test_loop_header_token_group(self): + """Test LoopHeaderTokenGroup class""" + cif = """ +loop_ +_foo.bar +_foo.baz +x y +""" + r = ihm.format._PreservingCifReader(StringIO(cif)) + token = list(r.read_file())[1] + self.assertIsInstance(token, ihm.format._LoopHeaderTokenGroup) + self.assertEqual(str(token), + "<_LoopHeaderTokenGroup(_foo, ['bar', 'baz'])>") + self.assertEqual(token.keyword_index("bar"), 0) + self.assertEqual(token.keyword_index("baz"), 1) + self.assertRaises(ValueError, token.keyword_index, "foo") + + def test_change_value_filter_init(self): + """Test ChangeValueFilter constructor""" + f = ihm.format._ChangeValueFilter("_citation.id", old='1', new='2') + self.assertEqual(f.category, '_citation') + self.assertEqual(f.keyword, 'id') + f = ihm.format._ChangeValueFilter(".bar", old='1', new='2') + self.assertIsNone(f.category) + self.assertEqual(f.keyword, 'bar') + f = ihm.format._ChangeValueFilter("bar", old='1', new='2') + self.assertIsNone(f.category) + self.assertEqual(f.keyword, 'bar') + + def test_change_value_filter_category(self): + """Test ChangeValueFilter.filter_category""" + var = ihm.format._PreservingVariableToken("_foo.bar", 1) + space = ihm.format._WhitespaceToken(" ") + val = ihm.format._TextValueToken("baz", quote=None) + tg = ihm.format._CategoryTokenGroup( + var, ihm.format._SpacedToken([space], val)) + # Value does not match + f = ihm.format._ChangeValueFilter("_foo.bar", old='old', new='new') + new_tg = f.filter_category(tg) + self.assertEqual(new_tg.value, 'baz') + + # Keyword does not match + f = ihm.format._ChangeValueFilter("_foo.foo", old='baz', new='new') + new_tg = f.filter_category(tg) + self.assertEqual(new_tg.value, 'baz') + + # Category does not match + f = ihm.format._ChangeValueFilter("_bar.bar", old='baz', new='new') + new_tg = f.filter_category(tg) + self.assertEqual(new_tg.value, 'baz') + + # Category matches exactly + f = ihm.format._ChangeValueFilter("_foo.bar", old='baz', new='new') + new_tg = f.filter_category(tg) + self.assertEqual(new_tg.value, 'new') + + # All-category match + f = ihm.format._ChangeValueFilter(".bar", old='new', new='new2') + new_tg = f.filter_category(tg) + self.assertEqual(new_tg.value, 'new2') + + def test_change_value_filter_loop(self): + """Test ChangeValueFilter.get_loop_filter""" + cif = """ +loop_ +_foo.bar +_foo.baz +x y +""" + r = ihm.format._PreservingCifReader(StringIO(cif)) + tokens = list(r.read_file()) + header = tokens[1] + row = tokens[2] + # Keyword does not match + f = ihm.format._ChangeValueFilter("_foo.foo", old='x', new='new') + self.assertIsNone(f.get_loop_filter(header)) + + # Category does not match + f = ihm.format._ChangeValueFilter("_bar.bar", old='x', new='new') + self.assertIsNone(f.get_loop_filter(header)) + + # Value does not match + f = ihm.format._ChangeValueFilter("_foo.bar", old='notx', new='new') + lf = f.get_loop_filter(header) + self.assertEqual(lf(row).as_mmcif(), "x y") + + # Category matches exactly + f = ihm.format._ChangeValueFilter("_foo.bar", old='x', new='new') + lf = f.get_loop_filter(header) + self.assertEqual(lf(row).as_mmcif(), "new y") + + # All-category match + f = ihm.format._ChangeValueFilter(".bar", old='new', new='new2') + lf = f.get_loop_filter(header) + self.assertEqual(lf(row).as_mmcif(), "new2 y") + if __name__ == '__main__': unittest.main() diff --git a/modules/core/dependency/python-ihm/test/test_location.py b/modules/core/dependency/python-ihm/test/test_location.py index 5be9b89a0e..989a99c437 100644 --- a/modules/core/dependency/python-ihm/test/test_location.py +++ b/modules/core/dependency/python-ihm/test/test_location.py @@ -183,6 +183,7 @@ def test_file_location_repo(self): self.assertIsNone(loc.file_size) self.assertEqual(str(loc), "") + self.assertEqual(str(r), "") # locations should only compare equal if path and repo both match loc2 = ihm.location.InputFileLocation('foo/bar', repo=r) self.assertEqual(loc, loc2) diff --git a/modules/core/dependency/python-ihm/test/test_main.py b/modules/core/dependency/python-ihm/test/test_main.py index 0eaf8775a9..14bdb2bcba 100644 --- a/modules/core/dependency/python-ihm/test/test_main.py +++ b/modules/core/dependency/python-ihm/test/test_main.py @@ -23,6 +23,7 @@ def test_system(self): s = ihm.System(title='test system') self.assertEqual(s.title, 'test system') self.assertEqual(s.id, 'model') + self.assertEqual(s.databases, []) def test_chem_comp(self): """Test ChemComp class""" @@ -39,6 +40,9 @@ def test_chem_comp(self): self.assertEqual(cc1, cc2) self.assertEqual(hash(cc1), hash(cc2)) self.assertNotEqual(cc1, cc3) + cc4 = ihm.ChemComp(id='GLY', code='G', code_canonical='G', + formula=ihm.unknown) + self.assertIsNone(cc4.formula_weight) def test_chem_comp_id_5(self): """Test new-style 5-character CCD IDs in ChemComp""" @@ -219,19 +223,21 @@ def test_entity(self): e2 = ihm.Entity('AHCD', description='bar') e3 = ihm.Entity('AHCDE', description='foo') heme = ihm.Entity([ihm.NonPolymerChemComp('HEM')]) - sugar = ihm.Entity([ihm.SaccharideChemComp('NAG')]) + sugar = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')]) self.assertEqual(e1, e2) self.assertNotEqual(e1, e3) self.assertEqual(e1.seq_id_range, (1, 4)) self.assertEqual(e3.seq_id_range, (1, 5)) - sugar2 = ihm.Entity([ihm.SaccharideChemComp('NAG')]) + sugar2 = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')]) # Branched entities never compare equal unless they are the same object self.assertEqual(sugar, sugar) self.assertNotEqual(sugar, sugar2) # seq_id does not exist for nonpolymers self.assertEqual(heme.seq_id_range, (None, None)) # We do have an internal seq_id_range for branched entities - self.assertEqual(sugar.seq_id_range, (1, 1)) + self.assertEqual(sugar.seq_id_range, (1, 2)) def test_entity_weight(self): """Test Entity.formula_weight""" @@ -246,7 +252,8 @@ def test_entity_type(self): protein = ihm.Entity('AHCD') heme = ihm.Entity([ihm.NonPolymerChemComp('HEM')]) water = ihm.Entity([ihm.WaterChemComp()]) - sugar = ihm.Entity([ihm.SaccharideChemComp('NAG')]) + sugar = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')]) self.assertEqual(protein.type, 'polymer') self.assertTrue(protein.is_polymeric()) self.assertFalse(protein.is_branched()) @@ -260,6 +267,12 @@ def test_entity_type(self): self.assertFalse(sugar.is_polymeric()) self.assertTrue(sugar.is_branched()) + # A single sugar should be classified non-polymer + single_sugar = ihm.Entity([ihm.SaccharideChemComp('NAG')]) + self.assertEqual(single_sugar.type, 'non-polymer') + self.assertFalse(single_sugar.is_polymeric()) + self.assertFalse(single_sugar.is_branched()) + # A single amino acid should be classified non-polymer single_aa = ihm.Entity('A') self.assertEqual(single_aa.type, 'non-polymer') @@ -415,7 +428,7 @@ def test_water_asym(self): self.assertEqual(a.number_of_molecules, 3) self.assertRaises(TypeError, ihm.AsymUnit, water) - self.assertRaises(TypeError, ihm.WaterAsymUnit, e) + self.assertRaises(TypeError, ihm.WaterAsymUnit, e, number=3) def test_asym_unit_residue(self): """Test Residue derived from an AsymUnit""" @@ -475,7 +488,8 @@ def test_asym_range(self): """Test AsymUnitRange class""" e = ihm.Entity('AHCDAH') heme = ihm.Entity([ihm.NonPolymerChemComp('HEM')]) - sugar = ihm.Entity([ihm.SaccharideChemComp('NAG')]) + sugar = ihm.Entity([ihm.SaccharideChemComp('NAG'), + ihm.SaccharideChemComp('FUC')]) a = ihm.AsymUnit(e, "testdetail") aheme = ihm.AsymUnit(heme) asugar = ihm.AsymUnit(sugar) @@ -484,7 +498,7 @@ def test_asym_range(self): # seq_id is not defined for nonpolymers self.assertEqual(aheme.seq_id_range, (None, None)) # We use seq_id internally for branched entities - self.assertEqual(asugar.seq_id_range, (1, 1)) + self.assertEqual(asugar.seq_id_range, (1, 2)) r = a(3, 4) self.assertEqual(r.seq_id_range, (3, 4)) self.assertEqual(r._id, 42) @@ -550,6 +564,26 @@ def test_orig_auth_seq_id_dict(self): self.assertEqual(a._get_pdb_auth_seq_id_ins_code(2), (4, 4, 'A')) self.assertEqual(a._get_pdb_auth_seq_id_ins_code(3), (3, 3, None)) + def test_water_orig_auth_seq_id_none(self): + """Test default water orig_auth_seq_id_map (None)""" + water = ihm.Entity([ihm.WaterChemComp()]) + a = ihm.WaterAsymUnit(water, number=3, + auth_seq_id_map={1: 0, 2: (4, 'A')}) + self.assertIsNone(a.orig_auth_seq_id_map) + self.assertEqual(a._get_pdb_auth_seq_id_ins_code(1), (0, 0, None)) + self.assertEqual(a._get_pdb_auth_seq_id_ins_code(2), (4, 4, 'A')) + self.assertEqual(a._get_pdb_auth_seq_id_ins_code(3), (3, 3, None)) + + def test_water_orig_auth_seq_id_dict(self): + """Test water orig_auth_seq_id_map as dict""" + water = ihm.Entity([ihm.WaterChemComp()]) + a = ihm.WaterAsymUnit(water, number=3, + auth_seq_id_map={1: 0, 2: (4, 'A')}, + orig_auth_seq_id_map={1: 5}) + self.assertEqual(a._get_pdb_auth_seq_id_ins_code(1), (0, 5, None)) + self.assertEqual(a._get_pdb_auth_seq_id_ins_code(2), (4, 4, 'A')) + self.assertEqual(a._get_pdb_auth_seq_id_ins_code(3), (3, 3, None)) + def test_assembly(self): """Test Assembly class""" e1 = ihm.Entity('AHCD') diff --git a/modules/core/dependency/python-ihm/test/test_make_mmcif.py b/modules/core/dependency/python-ihm/test/test_make_mmcif.py index d3206b071e..88e811e487 100644 --- a/modules/core/dependency/python-ihm/test/test_make_mmcif.py +++ b/modules/core/dependency/python-ihm/test/test_make_mmcif.py @@ -7,15 +7,16 @@ TOPDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) utils.set_search_paths(TOPDIR) import ihm.reader +import ihm.util.make_mmcif # Script should also be importable -MAKE_MMCIF = os.path.join(TOPDIR, 'util', 'make-mmcif.py') +MAKE_MMCIF = os.path.join(TOPDIR, 'ihm', 'util', 'make_mmcif.py') class Tests(unittest.TestCase): - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_simple(self): - """Simple test of make-mmcif utility script""" + """Simple test of make_mmcif utility script""" incif = utils.get_input_file_name(TOPDIR, 'struct_only.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: @@ -25,9 +26,9 @@ def test_simple(self): 'of transcription regulation by Gdown1') os.unlink('output.cif') - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_non_default_output(self): - """Simple test of make-mmcif with non-default output name""" + """Simple test of make_mmcif with non-default output name""" incif = utils.get_input_file_name(TOPDIR, 'struct_only.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif, 'non-default-output.cif']) @@ -38,9 +39,9 @@ def test_non_default_output(self): 'of transcription regulation by Gdown1') os.unlink('non-default-output.cif') - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_no_title(self): - """Check that make-mmcif adds missing title""" + """Check that make_mmcif adds missing title""" incif = utils.get_input_file_name(TOPDIR, 'no_title.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: @@ -48,22 +49,22 @@ def test_no_title(self): self.assertEqual(s.title, 'Auto-generated system') os.unlink('output.cif') - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_bad_usage(self): - """Bad usage of make-mmcif utility script""" + """Bad usage of make_mmcif utility script""" ret = subprocess.call([sys.executable, MAKE_MMCIF]) - self.assertEqual(ret, 1) + self.assertEqual(ret, 2) - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_same_file(self): - """Check that make-mmcif fails if input and output are the same""" + """Check that make_mmcif fails if input and output are the same""" incif = utils.get_input_file_name(TOPDIR, 'struct_only.cif') ret = subprocess.call([sys.executable, MAKE_MMCIF, incif, incif]) self.assertEqual(ret, 1) - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_mini(self): - """Check that make-mmcif works given only basic atom info""" + """Check that make_mmcif works given only basic atom info""" incif = utils.get_input_file_name(TOPDIR, 'mini.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: @@ -82,9 +83,9 @@ def test_mini(self): self.assertEqual(s.title, 'Auto-generated system') os.unlink('output.cif') - @unittest.skipIf(sys.version_info[0] < 3, "make-mmcif.py needs Python 3") + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") def test_pass_through(self): - """Check that make-mmcif passes through already-compliant files""" + """Check that make_mmcif passes through already-compliant files""" incif = utils.get_input_file_name(TOPDIR, 'docking.cif') subprocess.check_call([sys.executable, MAKE_MMCIF, incif]) with open('output.cif') as fh: @@ -104,6 +105,111 @@ def test_pass_through(self): self.assertEqual(s.title, 'Output from simple-docking example') os.unlink('output.cif') + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") + def test_add_polymers(self): + """Check that make_mmcif combines polymer information""" + # mini.cif contains two chains A, B + incif = utils.get_input_file_name(TOPDIR, 'mini.cif') + # mini_add.cif also contains A, B; A is the same sequence as mini.cif + # but B is different (so should be renamed C when we add) + addcif = utils.get_input_file_name(TOPDIR, 'mini_add.cif') + subprocess.check_call([sys.executable, MAKE_MMCIF, incif, + '--add', addcif]) + with open('output.cif') as fh: + s, = ihm.reader.read(fh) + self.assertEqual(len(s.entities), 3) + self.assertEqual(len(s.asym_units), 3) + self.assertEqual(len(s.state_groups), 2) + # Model from mini.cif + self.assertEqual(len(s.state_groups[0]), 1) + self.assertEqual(len(s.state_groups[0][0]), 1) + self.assertEqual(len(s.state_groups[0][0][0]), 1) + m = s.state_groups[0][0][0][0] + self.assertEqual(m.protocol.name, 'modeling') + self.assertEqual(m.assembly.name, 'Modeled assembly') + chain_a, chain_b, = m.representation + self.assertIs(chain_a.asym_unit.asym, s.asym_units[0]) + self.assertIs(chain_b.asym_unit.asym, s.asym_units[1]) + for chain in chain_a, chain_b: + self.assertIsInstance(chain, ihm.representation.AtomicSegment) + self.assertFalse(chain.rigid) + # Model from mini_add.cif + self.assertEqual(len(s.state_groups[1]), 1) + self.assertEqual(len(s.state_groups[1][0]), 1) + self.assertEqual(len(s.state_groups[1][0][0]), 1) + m = s.state_groups[1][0][0][0] + self.assertEqual(m.protocol.name, 'modeling') + self.assertEqual(m.assembly.name, 'Modeled assembly') + chain_a, chain_c, = m.representation + self.assertIs(chain_a.asym_unit.asym, s.asym_units[0]) + self.assertIs(chain_c.asym_unit.asym, s.asym_units[2]) + for chain in chain_a, chain_c: + self.assertIsInstance(chain, ihm.representation.AtomicSegment) + self.assertFalse(chain.rigid) + self.assertEqual(s.title, 'Auto-generated system') + os.unlink('output.cif') + + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") + def test_add_non_polymers(self): + """Check that make_mmcif combines non-polymer information""" + # mini_nonpoly.cif contains two hemes A, B + incif = utils.get_input_file_name(TOPDIR, 'mini_nonpoly.cif') + # mini_nonpoly_add.cif also contains A, B; A has the same author + # provided residue number as mini_nonpoly.cif but B is different + # (so should be renamed C when we add) + addcif = utils.get_input_file_name(TOPDIR, 'mini_nonpoly_add.cif') + subprocess.check_call([sys.executable, MAKE_MMCIF, incif, + '--add', addcif]) + with open('output.cif') as fh: + s, = ihm.reader.read(fh) + self.assertEqual(len(s.entities), 1) + self.assertEqual(len(s.asym_units), 3) + self.assertEqual(len(s.state_groups), 2) + # Model from mini_nonpoly.cif + self.assertEqual(len(s.state_groups[0]), 1) + self.assertEqual(len(s.state_groups[0][0]), 1) + self.assertEqual(len(s.state_groups[0][0][0]), 1) + m = s.state_groups[0][0][0][0] + self.assertEqual(m.protocol.name, 'modeling') + self.assertEqual(m.assembly.name, 'Modeled assembly') + chain_a, chain_b, = m.representation + self.assertIs(chain_a.asym_unit, s.asym_units[0]) + self.assertIs(chain_b.asym_unit, s.asym_units[1]) + for chain in chain_a, chain_b: + self.assertIsInstance(chain, ihm.representation.AtomicSegment) + self.assertFalse(chain.rigid) + # Model from mini_nonpoly_add.cif + self.assertEqual(len(s.state_groups[1]), 1) + self.assertEqual(len(s.state_groups[1][0]), 1) + self.assertEqual(len(s.state_groups[1][0][0]), 1) + m = s.state_groups[1][0][0][0] + self.assertEqual(m.protocol.name, 'modeling') + self.assertEqual(m.assembly.name, 'Modeled assembly') + chain_a, chain_c, = m.representation + self.assertIs(chain_a.asym_unit, s.asym_units[0]) + self.assertIs(chain_c.asym_unit, s.asym_units[2]) + for chain in chain_a, chain_c: + self.assertIsInstance(chain, ihm.representation.AtomicSegment) + self.assertFalse(chain.rigid) + self.assertEqual(s.title, 'Auto-generated system') + os.unlink('output.cif') + + @unittest.skipIf(sys.version_info[0] < 3, "make_mmcif.py needs Python 3") + def test_add_multi_data(self): + """make_mmcif should fail to add system with multiple data blocks""" + incif = utils.get_input_file_name(TOPDIR, 'mini.cif') + addcif = utils.get_input_file_name(TOPDIR, 'mini_add.cif') + with open(addcif) as fh: + addcif_contents = fh.read() + addcif_multi = 'addcif_multi.cif' + with open(addcif_multi, 'w') as fh: + fh.write(addcif_contents) + fh.write(addcif_contents.replace('data_model', 'data_model2')) + ret = subprocess.call([sys.executable, MAKE_MMCIF, incif, + '--add', addcif_multi]) + self.assertEqual(ret, 1) + os.unlink(addcif_multi) + if __name__ == '__main__': unittest.main() diff --git a/modules/core/dependency/python-ihm/test/test_multi_state_scheme.py b/modules/core/dependency/python-ihm/test/test_multi_state_scheme.py index dc3077cc90..76b1dbade0 100644 --- a/modules/core/dependency/python-ihm/test/test_multi_state_scheme.py +++ b/modules/core/dependency/python-ihm/test/test_multi_state_scheme.py @@ -386,7 +386,7 @@ def test_equilibriumconstant_init(self): 'equilibrium constant is determined from kinetic rates, kAB/kBA') def test_equilibrium_constant_eq(self): - """Test equality of EquilibriumConstant objetcs""" + """Test equality of EquilibriumConstant objects""" e_ref1 = ihm.multi_state_scheme.EquilibriumConstant( value='1.0', unit='a') @@ -550,7 +550,7 @@ def test_relaxationtime_init(self): unit='wrong_unit') def test_relaxationtime_eq(self): - """Test equality of RelaxationTime objetcs""" + """Test equality of RelaxationTime objects""" r_ref = ihm.multi_state_scheme.RelaxationTime( value=1.0, unit='milliseconds', diff --git a/modules/core/dependency/python-ihm/test/test_reader.py b/modules/core/dependency/python-ihm/test/test_reader.py index f8dbf604d4..3655dcddea 100644 --- a/modules/core/dependency/python-ihm/test/test_reader.py +++ b/modules/core/dependency/python-ihm/test/test_reader.py @@ -914,12 +914,14 @@ def test_external_file_handler(self): _ihm_external_files.reference_id _ihm_external_files.file_path _ihm_external_files.content_type +_ihm_external_files.file_format _ihm_external_files.file_size_bytes _ihm_external_files.details -1 1 scripts/test.py 'Modeling workflow or script' 180 'Test script' -2 2 foo/bar.txt 'Input data or restraints' . 'Test text' -3 3 . 'Modeling or post-processing output' . 'Ensemble structures' -4 3 . . . . +1 1 scripts/test.py 'Modeling workflow or script' TXT 180 'Test script' +2 2 foo/bar.txt 'Input data or restraints' TXT 42.0 'Test text' +3 3 . 'Modeling or post-processing output' . . 'Ensemble structures' +4 3 . . . . . +5 3 foo.txt Other . . 'Other file' """ # Order of the categories shouldn't matter cif1 = ext_ref_cat + ext_file_cat @@ -927,11 +929,13 @@ def test_external_file_handler(self): for cif in cif1, cif2: for fh in cif_file_handles(cif): s, = ihm.reader.read(fh) - l1, l2, l3, l4 = s.locations + l1, l2, l3, l4, l5 = s.locations self.assertEqual(l1.path, 'scripts/test.py') self.assertEqual(l1.details, 'Test script') self.assertEqual(l1.repo.doi, '10.5281/zenodo.1218053') + self.assertIsInstance(l1.file_size, int) self.assertEqual(l1.file_size, 180) + self.assertEqual(l1.file_format, 'TXT') self.assertEqual(l1.repo.details, 'test repo') self.assertEqual(l1.__class__, ihm.location.WorkflowFileLocation) @@ -939,22 +943,29 @@ def test_external_file_handler(self): self.assertEqual(l2.path, 'foo/bar.txt') self.assertEqual(l2.details, 'Test text') self.assertIsNone(l2.repo) - self.assertIsNone(l2.file_size) + self.assertIsInstance(l2.file_size, float) + self.assertAlmostEqual(l2.file_size, 42.0, delta=0.01) + self.assertEqual(l2.file_format, 'TXT') self.assertEqual(l2.__class__, ihm.location.InputFileLocation) self.assertEqual(l3.path, '.') self.assertEqual(l3.details, 'Ensemble structures') self.assertIsNone(l3.file_size) + self.assertIsNone(l3.file_format) self.assertEqual(l3.repo.doi, '10.5281/zenodo.1218058') self.assertEqual(l3.__class__, ihm.location.OutputFileLocation) self.assertEqual(l4.path, '.') self.assertIsNone(l4.file_size) + self.assertIsNone(l4.file_format) self.assertIsNone(l4.details) self.assertEqual(l4.repo.doi, '10.5281/zenodo.1218058') # Type is unspecified self.assertEqual(l4.__class__, ihm.location.FileLocation) + self.assertEqual(l5.content_type, 'Other') + self.assertEqual(l5.__class__, ihm.location.FileLocation) + def test_dataset_list_handler(self): """Test DatasetListHandler""" cif = """ @@ -1993,7 +2004,7 @@ def test_atom_site_handler(self): self.assertIsNone(a1.occupancy) self.assertEqual(a2.asym_unit._id, 'B') - self.assertIsNone(a2.seq_id) + self.assertEqual(a2.seq_id, 1) self.assertEqual(a2.atom_id, 'CA') self.assertEqual(a2.type_symbol, 'C') self.assertEqual(a2.het, True) @@ -2123,6 +2134,12 @@ def test_atom_site_handler_water(self): _entity.id _entity.type 1 water +loop_ +_struct_asym.id +_struct_asym.entity_id +_struct_asym.details +A 1 Water +B 1 Water # loop_ _pdbx_nonpoly_scheme.asym_id @@ -2134,7 +2151,7 @@ def test_atom_site_handler_water(self): _pdbx_nonpoly_scheme.auth_mon_id _pdbx_nonpoly_scheme.pdb_strand_id _pdbx_nonpoly_scheme.pdb_ins_code -A 1 HOH 1 6 6 HOH A . +A 1 HOH 1 50 500 HOH A . # loop_ _atom_site.group_PDB @@ -2156,17 +2173,93 @@ def test_atom_site_handler_water(self): _atom_site.B_iso_or_equiv _atom_site.pdbx_PDB_model_num _atom_site.ihm_model_id -HETATM 1 O O . HOH . 6 ? A 10.000 10.000 10.000 . 1 A . 1 1 -HETATM 2 O O . HOH . 7 . A 20.000 20.000 20.000 . 1 A . 1 1 +HETATM 1 O O . HOH . 40 ? A 10.000 10.000 10.000 . 1 A . 1 1 +HETATM 2 O O . HOH . 50 ? A 10.000 10.000 10.000 . 1 A . 1 1 +HETATM 3 O O . HOH . 60 . A 20.000 20.000 20.000 . 1 A . 1 1 +HETATM 4 O O . HOH . 70 . B 20.000 20.000 20.000 . 1 B . 1 1 """) s, = ihm.reader.read(fh) m = s.state_groups[0][0][0][0] - a1, a2 = m._atoms - # First atom is in pdbx_nonpoly_scheme with - # ndb_seq_num=1, pdb_seq_num=6 + a1, a2, a3, b1 = m._atoms + # Should include info from both atom_site and scheme table + self.assertEqual(a1.asym_unit.auth_seq_id_map, + {1: (40, None), 2: (50, None), 3: (60, None)}) + self.assertEqual(a1.asym_unit.orig_auth_seq_id_map, + {2: 500}) + self.assertEqual(b1.asym_unit.auth_seq_id_map, {1: (70, None)}) + self.assertIsNone(b1.asym_unit.orig_auth_seq_id_map) + # seq_id should be assigned based on atom_site self.assertEqual(a1.seq_id, 1) - # Second atom is not in pdbx_nonpoly_scheme, so we keep auth_seq_id - self.assertEqual(a2.seq_id, 7) + self.assertEqual(a2.seq_id, 2) + self.assertEqual(a3.seq_id, 3) + self.assertEqual(b1.seq_id, 1) + + def test_atom_site_handler_branched(self): + """Test AtomSiteHandler reading branched molecules""" + cif = """ +loop_ +_entity.id +_entity.type +1 branched +loop_ +_struct_asym.id +_struct_asym.entity_id +_struct_asym.details +A 1 . +# +loop_ +_pdbx_branch_scheme.asym_id +_pdbx_branch_scheme.entity_id +_pdbx_branch_scheme.mon_id +_pdbx_branch_scheme.num +_pdbx_branch_scheme.pdb_seq_num +_pdbx_branch_scheme.auth_seq_num +_pdbx_branch_scheme.auth_mon_id +_pdbx_branch_scheme.pdb_asym_id +A 1 BGC 1 51 501 BGC A +A 1 BGC 2 52 502 BGC A +A 1 BGC 3 53 503 BGC A +# +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_seq_id +_atom_site.auth_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.label_asym_id +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.label_entity_id +_atom_site.auth_asym_id +_atom_site.B_iso_or_equiv +_atom_site.pdbx_PDB_model_num +_atom_site.ihm_model_id +HETATM 1 C C . BGC . 52 ? A 10.000 10.000 10.000 . 1 A . 1 1 +HETATM 2 C C . BGC . 53 ? A 10.000 10.000 10.000 . 1 A . 1 1 +""" + # Should fail since residue #60 is not in the scheme table + badline = "HETATM 3 C C . BGC . 60 . A 20.00 20.00 20.00 . 1 A . 1 1" + fh = StringIO(cif + badline) + self.assertRaises(ValueError, ihm.reader.read, fh) + + fh = StringIO(cif) + s, = ihm.reader.read(fh) + m = s.state_groups[0][0][0][0] + a1, a2 = m._atoms + # seq_id should match num, i.e. start at 2 since residue 51 is missing + self.assertEqual(a1.seq_id, 2) + self.assertEqual(a2.seq_id, 3) + self.assertEqual(a1.asym_unit.auth_seq_id_map, + {1: (51, None), 2: (52, None), 3: (53, None)}) + self.assertEqual(a1.asym_unit.orig_auth_seq_id_map, + {1: 501, 2: 502, 3: 503}) + self.assertEqual(a1.asym_unit.num_map, {1: 2, 2: 3}) def test_derived_distance_restraint_handler(self): """Test DerivedDistanceRestraintHandler""" @@ -2679,18 +2772,18 @@ def test_poly_seq_scheme_handler_str_seq_id(self): _pdbx_poly_seq_scheme.pdb_ins_code A 1 1 6 6 ? . A 1 2 7 12 ? . -A 1 3 8 24 ? X +A 1 3 8 24 ? . A 1 4 9A 48A ? . """) s, = ihm.reader.read(fh) asym, = s.asym_units self.assertIsNone(asym._strand_id) self.assertEqual(asym.auth_seq_id_map, {1: (6, None), 2: (7, None), - 3: (8, 'X'), 4: ('9A', None)}) + 3: (8, None), 4: ('9A', None)}) self.assertEqual([asym.residue(i).auth_seq_id for i in range(1, 5)], [6, 7, 8, '9A']) self.assertIsNone(asym.residue(1).ins_code) - self.assertEqual(asym.residue(3).ins_code, 'X') + self.assertIsNone(asym.residue(3).ins_code) self.assertEqual(asym.orig_auth_seq_id_map, {2: 12, 3: 24, 4: '48A'}) def test_nonpoly_scheme_handler(self): @@ -2733,13 +2826,13 @@ def test_nonpoly_scheme_handler(self): _pdbx_nonpoly_scheme.auth_seq_num _pdbx_nonpoly_scheme.pdb_strand_id _pdbx_nonpoly_scheme.pdb_ins_code -A 1 FOO 1 1 1 . . A 1 BAR 1 101 202 . . B 2 BAR 1 1 1 Q X C 3 HOH . 1 1 . . C 3 HOH 2 2 2 . . C 3 HOH 3 5 10 . . C 3 HOH 4 1 20 . . +C 3 HOH 5 7 7 . . """) s, = ihm.reader.read(fh) e1, e2, e3 = s.entities @@ -2767,10 +2860,9 @@ def test_nonpoly_scheme_handler(self): self.assertEqual(a2._strand_id, 'Q') self.assertIsNone(a2.orig_auth_seq_id_map) - # For waters, the first row should be ignored since ndb_seq_num - # is missing; the second row should also be ignored because it - # is a one-to-one mapping; only the last two rows should be used - self.assertEqual(a3.auth_seq_id_map, {3: (5, None), 4: (1, None)}) + self.assertEqual(a3.auth_seq_id_map, {1: (1, None), 2: (2, None), + 3: (5, None), 4: (1, None), + 5: (7, None)}) self.assertEqual(a3.orig_auth_seq_id_map, {3: 10, 4: 20}) def test_cross_link_list_handler(self): @@ -4941,23 +5033,31 @@ def test_branch_scheme_handler(self): """) s, = ihm.reader.read(fh) asym_a, asym_b, asym_c = s.asym_units - self.assertEqual(asym_a.auth_seq_id_map, 4) + self.assertEqual(asym_a.auth_seq_id_map, + {1: (5, None), 2: (6, None), 3: (7, None), + 4: (8, None)}) self.assertEqual(asym_a._strand_id, '0') self.assertEqual(asym_a.residue(1).auth_seq_id, 5) self.assertIsNone(asym_a.orig_auth_seq_id_map) + self.assertIsNone(asym_a.num_map) - self.assertEqual(asym_b.auth_seq_id_map, 0) + self.assertEqual(asym_b.auth_seq_id_map, + {1: (1, None), 2: (2, None), 3: (3, None), + 4: (4, None)}) self.assertIsNone(asym_b._strand_id) self.assertEqual(asym_b.residue(1).auth_seq_id, 1) self.assertEqual(asym_b.orig_auth_seq_id_map, {1: 11, 2: 12, 3: 13, 4: 14}) + self.assertIsNone(asym_b.num_map) self.assertEqual(asym_c.auth_seq_id_map, {1: (2, None), 2: (4, None), 3: (6, None), 4: (8, None)}) self.assertIsNone(asym_c._strand_id) self.assertEqual(asym_c.residue(1).auth_seq_id, 2) - self.assertIsNone(asym_c.orig_auth_seq_id_map) + self.assertEqual(asym_c.orig_auth_seq_id_map, + {1: None, 2: None, 3: None, 4: None}) + self.assertIsNone(asym_c.num_map) def test_entity_branch_list_handler(self): """Test EntityBranchListHandler""" @@ -5041,6 +5141,48 @@ def test_entity_branch_link_handler(self): self.assertIsNone(lnk2.order) self.assertIsNone(lnk2.details) + def test_database_handler(self): + """Test DatabaseHandler""" + fh = StringIO(""" +loop_ +_database_2.database_id +_database_2.database_code +_database_2.pdbx_database_accession +_database_2.pdbx_DOI +foo bar . ? +baz 1abc 1abcxyz 1.2.3.4 +""") + s, = ihm.reader.read(fh) + d1, d2 = s.databases + self.assertEqual(d1.id, 'foo') + self.assertEqual(d1.code, 'bar') + self.assertIsNone(d1.accession) + self.assertIs(d1.doi, ihm.unknown) + self.assertEqual(d2.id, 'baz') + self.assertEqual(d2.code, '1abc') + self.assertEqual(d2.accession, '1abcxyz') + self.assertEqual(d2.doi, '1.2.3.4') + + def test_database_status_handler(self): + """Test DatabaseStatusHandler""" + fh = StringIO(""" +_pdbx_database_status.status_code REL +_pdbx_database_status.entry_id 5FD1 +_pdbx_database_status.recvd_initial_deposition_date 1993-06-29 +_pdbx_database_status.deposit_site ? +_pdbx_database_status.process_site BNL +_pdbx_database_status.SG_entry . +""") + s, = ihm.reader.read(fh) + # Should pass through to a dict + self.assertEqual(s._database_status, + {'status_code': 'REL', + 'entry_id': '5FD1', + 'recvd_initial_deposition_date': '1993-06-29', + 'deposit_site': ihm.unknown, + 'process_site': 'BNL', + 'sg_entry': None}) + if __name__ == '__main__': unittest.main() diff --git a/modules/core/dependency/python-ihm/util/debian/changelog b/modules/core/dependency/python-ihm/util/debian/changelog new file mode 100644 index 0000000000..21f0eaf831 --- /dev/null +++ b/modules/core/dependency/python-ihm/util/debian/changelog @@ -0,0 +1,11 @@ +python-ihm (1.1-1~@CODENAME@) @CODENAME@; urgency=low + + * python-ihm 1.1 release + + -- Ben Webb Thu, 09 May 2024 12:44:51 -0700 + +python-ihm (1.0-1~@CODENAME@) @CODENAME@; urgency=low + + * Initial .deb release + + -- Ben Webb Thu, 07 Mar 2024 00:19:35 +0000 diff --git a/modules/core/dependency/python-ihm/util/debian/control b/modules/core/dependency/python-ihm/util/debian/control new file mode 100644 index 0000000000..82578bb835 --- /dev/null +++ b/modules/core/dependency/python-ihm/util/debian/control @@ -0,0 +1,16 @@ +Source: python-ihm +Priority: optional +Maintainer: Ben Webb +Build-Depends: debhelper-compat (= 13), dh-python, swig, python3-dev +Standards-Version: 4.6.2 +Section: libs +Homepage: https://github.com/ihmwg/python-ihm +Vcs-Browser: https://github.com/ihmwg/python-ihm + +Package: python3-ihm +Section: libs +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends}, python3-msgpack +Description: Package for handling IHM mmCIF and BinaryCIF files + This is a Python package to assist in handling mmCIF and BinaryCIF files + compliant with the integrative/hybrid modeling (IHM) extension. diff --git a/modules/core/dependency/python-ihm/util/debian/copyright b/modules/core/dependency/python-ihm/util/debian/copyright new file mode 100644 index 0000000000..915c2202b8 --- /dev/null +++ b/modules/core/dependency/python-ihm/util/debian/copyright @@ -0,0 +1,23 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: python-ihm +Source: https://github.com/ihmwg/python-ihm + +Copyright: 2018-2024 IHM Working Group +License: MIT + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. diff --git a/modules/core/dependency/python-ihm/util/debian/make-package.sh b/modules/core/dependency/python-ihm/util/debian/make-package.sh new file mode 100755 index 0000000000..2e859631af --- /dev/null +++ b/modules/core/dependency/python-ihm/util/debian/make-package.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# Build a Debian package from source + +set -e + +VERSION=$(grep __version__ ../../ihm/__init__.py |cut -d\' -f2) +CODENAME=`lsb_release -c -s` + +# Make sure we can find the rest of our input files +TOOL_DIR=`dirname "$0"` +# Get absolute path to top dir +TOP_DIR=`cd "${TOOL_DIR}/../.." && pwd` + +cd ${TOP_DIR} +rm -rf debian +cp -r util/debian/ . +rm debian/make-package.sh +sed -i -e "s/\@CODENAME\@/$CODENAME/g" debian/changelog + +if [ "${CODENAME}" = "focal" ]; then + sed -i -e "s/debhelper-compat (= 13)/debhelper-compat (= 12)/" debian/control +fi + +dpkg-buildpackage -S diff --git a/modules/core/dependency/python-ihm/util/debian/rules b/modules/core/dependency/python-ihm/util/debian/rules new file mode 100755 index 0000000000..7386c89636 --- /dev/null +++ b/modules/core/dependency/python-ihm/util/debian/rules @@ -0,0 +1,5 @@ +#!/usr/bin/make -f +#export DH_VERBOSE=1 +export PYBUILD_NAME=ihm +%: + dh $@ --with python3 --buildsystem=pybuild diff --git a/modules/core/dependency/python-ihm/util/debian/source/format b/modules/core/dependency/python-ihm/util/debian/source/format new file mode 100644 index 0000000000..163aaf8d82 --- /dev/null +++ b/modules/core/dependency/python-ihm/util/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/modules/core/dependency/python-ihm/util/make-mmcif.py b/modules/core/dependency/python-ihm/util/make-mmcif.py deleted file mode 100644 index 817858fae6..0000000000 --- a/modules/core/dependency/python-ihm/util/make-mmcif.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python3 - -""" -Add minimal IHM-related tables to an mmCIF file. - -Given any mmCIF file as input, this script will add any missing -IHM-related tables and write out a new file that is minimally compliant -with the IHM dictionary. - -This is done by simply reading in the original file with python-ihm and -then writing it out again, so - a) any data in the input file that is not understood by python-ihm - will be lost on output; and - b) input files that aren't compliant with the PDBx dictionary, or that - contain syntax errors or other problems, may crash or otherwise confuse - python-ihm. -""" - - -import ihm.reader -import ihm.dumper -import ihm.model -import ihm.protocol -import sys -import os - - -def add_ihm_info(s): - if not s.title: - s.title = 'Auto-generated system' - - # Simple default assembly containing all chains - default_assembly = ihm.Assembly(s.asym_units, name='Modeled assembly') - - # Simple default atomic representation for everything - default_representation = ihm.representation.Representation( - [ihm.representation.AtomicSegment(asym, rigid=False) - for asym in s.asym_units]) - - # Simple default modeling protocol - default_protocol = ihm.protocol.Protocol(name='modeling') - - for state_group in s.state_groups: - for state in state_group: - for model_group in state: - for model in model_group: - if not model.assembly: - model.assembly = default_assembly - if not model.representation: - model.representation = default_representation - if not model.protocol: - model.protocol = default_protocol - return s - - -if len(sys.argv) != 2 and len(sys.argv) != 3: - print("Usage: %s input.cif [output.cif]" % sys.argv[0], file=sys.stderr) - sys.exit(1) - -fname = sys.argv[1] -if len(sys.argv) > 2: - out_fname = sys.argv[2] -else: - out_fname = 'output.cif' - -if (os.path.exists(fname) and os.path.exists(out_fname) - and os.path.samefile(fname, out_fname)): - raise ValueError("Input and output are the same file") - -with open(fname) as fh: - with open(out_fname, 'w') as fhout: - ihm.dumper.write( - fhout, [add_ihm_info(s) for s in ihm.reader.read(fh)], - variant=ihm.dumper.IgnoreVariant(['_audit_conform'])) diff --git a/modules/core/dependency/python-ihm/util/python-ihm.spec b/modules/core/dependency/python-ihm/util/python-ihm.spec index c037d64c51..1f96050538 100644 --- a/modules/core/dependency/python-ihm/util/python-ihm.spec +++ b/modules/core/dependency/python-ihm/util/python-ihm.spec @@ -1,7 +1,7 @@ Name: python3-ihm License: MIT Group: Applications/Engineering -Version: 0.43 +Version: 1.1 Release: 1%{?dist} Summary: Package for handling IHM mmCIF and BinaryCIF files Packager: Ben Webb @@ -36,6 +36,12 @@ sed -i -e "s/install_requires=\['msgpack'\]/#/" setup.py %defattr(-,root,root) %changelog +* Thu May 09 2024 Ben Webb 1.1-1 +- Update to latest upstream. + +* Tue Feb 13 2024 Ben Webb 1.0-1 +- Update to latest upstream. + * Fri Dec 08 2023 Ben Webb 0.43-1 - Update to latest upstream.