Squashed 'modules/core/dependency/python-ihm/' changes from 4fb3cf6ec…

…9..349652213c 349652213c flake8 fixes 0390c2a15b Add simple filtering mechanism 480f3d0e7d flake8 fix 7eaf700829 Allow get/set of data items in loop constructs af29654b8a Allow easy get/set of non-loop data items 0cee0f2cb6 Add class to assign tokens to loops or categories 265e4f20a6 Increase test coverage 138ee1c646 Preserve data item case c7920ed9a3 Handle (and preserve) quoted or multiline strings 025f138e36 Preserve data, loop, ?, . tokens ebf3522c41 Test output of read-in tokens 59bfbcac74 Preserve line breaks when tokenizing mmCIF da28d1d0a6 Optionally preserve whitespace when reading mmCIF 9f40565b07 Add a tokenizer subclass that preserves mmCIF comments de233bc0c9 Split mmCIF tokenizer into separate class 3d4196b7e4 We need to also update the Ubuntu PPA 4675e9ad4a Fix typo 5e04a2eb98 Update for 1.1 release aa752e80e3 Add support for ihm_external_files.file_format c837df1fd4 Use 'Other' for unspecified file content type cb61fd1fc0 Allow file size to be a float, closes ihmwg/python-ihm#138 b142c8653f Add missing ihm.util subpackage, closes ihmwg/python-ihm#136 43892b1fa2 Explicitly list pdbx_database_status items b1a0bf00d4 Pass through the pdbx_database_status category cdb9c49770 flake8 fix 1310a64adf Add support for database_2 category b476d4dea3 Add make_mmcif.py to installed package 859dd7fc41 Make importable 247058244b Fully qualify links to improve ModelCIF docs 56a27feaa8 Link to PPA 02d67dd235 Don't overwrite orig.tar.gz, fix focal build e53f4e46b8 Build only signed source packages (for PPA) 2b78fb520a Add Ubuntu version to .deb filename 99c8140105 Add basic Debian packaging de2266ada3 Update copyright year 57b2c28b2a Fix typos 638f1b1e89 Prepare for 1.0 release 5bf8b2932b Fix typos 0ac9ebccf9 Add recent changes a4a8651a0d Fix broken links 5e386a887d Warn about broken links df273f6a73 Fix sphinx warning 29ad70b5f1 Fix test for non-integer pdb_seq_num 1be7a12e8f Drop redundant code for branched entities 51e1a4f62e Test adding files containing nonpolymers 3f6543b4e2 Allow combining multiple mmCIF files into one a3baf8b8a8 Use argparse for argument parsing 374c50e96d Handle unknown formula when getting component weight 34aba4a259 Add ins code to output pdbx_branch_scheme table 4a01eba2f6 Classify single sugar as a nonpolymer, not branched 8bbf43b8db Improve handling of seq_id for branched entities 64d842cabf Improve handling of seq_id for waters, nonpolymers 29d7b49640 Allow setting orig_auth_seq_id_map for waters 83490bc9e4 Fix typo afc3b6eb1d Clarify seq_id vs internal numbering 8be5d07aeb Use canonical URLs 111882198a Update to match new audit record 2ee6db31c7 Point to new IHMCIF dictionary URL a0fd0da7b0 Use latest checkout action a9703c8208 Update to CodeQL action v2 git-subtree-dir: modules/core/dependency/python-ihm git-subtree-split: 349652213c894bcf700b4600f883ab7920fb8a43
salilab · May 29, 2024 · b13fa71 · b13fa71
1 parent 1658b80
commit b13fa71
Show file tree

Hide file tree

Showing 45 changed files with 1,976 additions and 354 deletions.
diff --git a/modules/core/dependency/python-ihm/.github/workflows/codeql-analysis.yml b/modules/core/dependency/python-ihm/.github/workflows/codeql-analysis.yml
@@ -39,11 +39,11 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
+      uses: github/codeql-action/init@v2
       with:
         languages: ${{ matrix.language }}
         # If you wish to specify custom queries, you can do so here or in a config file.
@@ -55,4 +55,4 @@ jobs:
        python setup.py build_ext --inplace -t build
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
+      uses: github/codeql-action/analyze@v2
diff --git a/modules/core/dependency/python-ihm/.github/workflows/linter.yml b/modules/core/dependency/python-ihm/.github/workflows/linter.yml
@@ -13,7 +13,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Code
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
       - name: Lint Code Base
         uses: docker://github/super-linter:v2.1.0
         env:

diff --git a/modules/core/dependency/python-ihm/ChangeLog.rst b/modules/core/dependency/python-ihm/ChangeLog.rst
@@ -1,3 +1,33 @@
+1.1 - 2024-05-09
+================
+  - :class:`ihm.System` now allows for one or more official database IDs to
+    be associated with the system using the new :class:`ihm.Database` class.
+    This maps to the mmCIF ``_database_2`` category (#135).
+  - :class:`ihm.location.FileLocation` now allows for an optional file format
+    to be specified (#139).
+  - The ``util/make-mmcif.py`` script is now included in the installed package,
+    so can be run if desired with ``python3 -m ihm.util.make_mmcif`` (#134).
+  - Bugfix: allow for file sizes in input mmCIF files to be floating point
+    values (#138).
+  - Bugfix: the 'Other' content type is now handled correctly when reading
+    information about external files from an mmCIF file (#139).
+
+1.0 - 2024-02-13
+================
+  - Support for multi-state schemes (such as kinetic rates and relaxation
+    times for conversions between states) was added;
+    see :mod:`ihm.multi_state_scheme`.
+  - Residue numbering in non-polymer, water, and branched entities should
+    now be better handled, no longer requiring the various scheme tables
+    to precede ``atom_site``. If you subclass :class:`ihm.model.Model`, atoms
+    may need to be renumbered; see :meth:`ihm.model.Model.add_atom` (#130).
+  - Original author-provided numbering can now be provided for waters,
+    using the ``orig_auth_seq_id_map`` argument to :class:`ihm.WaterAsymUnit`.
+  - The make-mmcif.py utility script now has basic functionality for
+    combining multiple input files into one, relabeling chain IDs if necessary.
+  - An :class:`ihm.Entity` containing just a single sugar is now classified
+    as a nonpolymer, not branched.
+
 0.43 - 2023-12-08
 =================
   - Branched and polymeric empty entities are now distinguished

diff --git a/modules/core/dependency/python-ihm/LICENSE b/modules/core/dependency/python-ihm/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2018-2023 IHM Working Group
+Copyright (c) 2018-2024 IHM Working Group
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/modules/core/dependency/python-ihm/MANIFEST.in b/modules/core/dependency/python-ihm/MANIFEST.in
@@ -4,4 +4,4 @@ include examples/*
 include util/make-mmcif.py
 include src/ihm_format.h
 include src/ihm_format.i
-include src/ihm_format_wrap_0.43.c
+include src/ihm_format_wrap_1.1.c
diff --git a/modules/core/dependency/python-ihm/README.md b/modules/core/dependency/python-ihm/README.md
@@ -6,9 +6,9 @@
 [![Windows Build Status](https://ci.appveyor.com/api/projects/status/5o28oe477ii8ur4h?svg=true)](https://ci.appveyor.com/project/benmwebb/python-ihm)
 [![codecov](https://codecov.io/gh/ihmwg/python-ihm/branch/main/graph/badge.svg)](https://codecov.io/gh/ihmwg/python-ihm)
 
-This is a Python package to assist in handling [mmCIF](http://mmcif.wwpdb.org/)
+This is a Python package to assist in handling [mmCIF](https://mmcif.wwpdb.org/)
 and [BinaryCIF](https://github.com/dsehnal/BinaryCIF) files compliant with the
-[integrative/hybrid modeling (IHM)](http://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Index/)
+[integrative/hybrid modeling (IHM)](https://mmcif.wwpdb.org/dictionaries/mmcif_ihm_ext.dic/Index/)
 extension. It works with Python 2.7 or Python 3.
 
 To handle non-integrative theoretical models (for example, homology models),
@@ -42,6 +42,14 @@ On a Fedora or RedHat Enterprise Linux box, install with
 dnf copr enable salilab/salilab; dnf install python3-ihm
 ```
 
+On an Ubuntu LTS box, install from
+[our PPA](https://launchpad.net/~salilab/+archive/ubuntu/ppa) with
+
+```
+apt install software-properties-common; add-apt-repository ppa:salilab/ppa;
+apt install python3-ihm
+```
+
 Alternatively, install with pip:
 
 ```

diff --git a/modules/core/dependency/python-ihm/docs/conf.py b/modules/core/dependency/python-ihm/docs/conf.py
@@ -156,3 +156,6 @@
      author, 'Python-IHM', 'One line description of project.',
      'Miscellaneous'),
 ]
+
+# Warn about broken links to classes, etc.
+nitpicky = True
diff --git a/modules/core/dependency/python-ihm/docs/design.rst b/modules/core/dependency/python-ihm/docs/design.rst
@@ -27,7 +27,7 @@ Types rather than enums
 Where the underlying IHM mmCIF dictionary uses an enumeration, generally this
 corresponds to separate sibling classes in this package. For example, two
 datasets which differ only in their ``data_type``
-`in the dictionary <http://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Items/_ihm_dataset_list.data_type.html>`_
+`in the dictionary <https://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Items/_ihm_dataset_list.data_type.html>`_
 (such as a electron microscopy density map and small angle scattering data)
 are represented with two classes in this package:
 :class:`ihm.dataset.EMDensityDataset` and :class:`ihm.dataset.SASDataset`.
@@ -42,7 +42,7 @@ This is naturally represented in Python as a hierarchy of classes, with
 members pointing to other objects as appropriate. IDs are not used to look
 up other objects, and are only used internally to populate the tables.
 For example, to group multiple models together, the dictionary assigns all of
-the models the same `model_group_id <http://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Items/_ihm_model_list.model_group_id.html>`_
+the models the same `model_group id <https://mmcif.wwpdb.org/dictionaries/mmcif_ihm_ext.dic/Categories/ihm_model_group.html>`_
 while in the Python package the :class:`ihm.model.Model` objects are placed
 into a :class:`ihm.model.ModelGroup` object, which acts like a simple Python
 list.
@@ -71,7 +71,7 @@ mmCIF backend
 =============
 
 The classes in this package roughly correspond to
-`categories <http://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Categories/index.html>`_
+`categories <https://mmcif.wwpdb.org/dictionaries/mmcif_ihm.dic/Categories/index.html>`_
 in the underlying IHM mmCIF dictionary. This allows for simple output of
 mmCIF formatted files, but also allows for the potential future support for
 other file formats that support the dictionary or a subset of it, such

diff --git a/modules/core/dependency/python-ihm/docs/introduction.rst b/modules/core/dependency/python-ihm/docs/introduction.rst
@@ -20,11 +20,11 @@ application with a set of Python objects. This includes
 
 Once created, this set of Python objects can be written to an mmCIF file
 that is compliant with the
-`IHM extension <https://github.com/ihmwg/IHM-dictionary>`_
-to the `PDBx/mmCIF dictionary <http://mmcif.wwpdb.org/>`_,
+`IHMCIF extension <https://github.com/ihmwg/IHMCIF>`_
+to the `PDBx/mmCIF dictionary <https://mmcif.wwpdb.org/>`_,
 suitable for deposition in the
 `PDB-Dev repository <https://pdb-dev.wwpdb.org/>`_. The files are best viewed
-in a viewer that supports IHM mmCIF, such as
+in a viewer that supports IHMCIF, such as
 `UCSF ChimeraX <https://www.cgl.ucsf.edu/chimerax/>`_, although they may be
 partially viewable in regular PDBx mmCIF viewers (likely only the atomic
 coordinates will be visible).

diff --git a/modules/core/dependency/python-ihm/docs/main.rst b/modules/core/dependency/python-ihm/docs/main.rst
@@ -12,6 +12,9 @@ The :mod:`ihm` Python module
 .. autoclass:: System
    :members:
 
+.. autoclass:: Database
+   :members:
+
 .. autoclass:: Software
    :members:
 

diff --git a/modules/core/dependency/python-ihm/docs/usage.rst b/modules/core/dependency/python-ihm/docs/usage.rst
@@ -118,7 +118,11 @@ of the data used in modeling:
 
  - *Internal numbering*. Residues are always numbered sequentially starting at
    1 in an :class:`~ihm.Entity`. All references to residues or residue ranges in
-   the library use this numbering.
+   the library use this numbering. For polymers, this internal numbering matches
+   the ``seq_id`` used in the mmCIF dictionary, while for branched entities,
+   this matches ``num`` in the dictionary. (For other types of entities
+   (non-polymers, waters) ``seq_id`` is not used in mmCIF,
+   but the residues are still numbered sequentially from 1 in this library.)
  - *Author-provided numbering*. If a different numbering scheme is used by the
    authors, for example to correspond to the numbering of the original sequence
    that is modeled, this can be given as an author-provided numbering for

diff --git a/modules/core/dependency/python-ihm/ihm/__init__.py b/modules/core/dependency/python-ihm/ihm/__init__.py
@@ -20,7 +20,7 @@
 import json
 from . import util
 
-__version__ = '0.43'
+__version__ = '1.1'
 
 
 class __UnknownValue(object):
@@ -74,14 +74,22 @@ class System(object):
        :param str id: Unique identifier for this system in the mmCIF file.
        :param str model_details: Detailed description of the system, like an
                                  abstract.
+       :param databases: If this system is part of one or more official
+              databases (e.g. PDB, PDB-Dev, SwissModel), details of
+              the database identifiers.
+       :type databases: sequence of :class:`Database`
     """
 
     structure_determination_methodology = "integrative"
 
-    def __init__(self, title=None, id='model', model_details=None):
+    def __init__(self, title=None, id='model', model_details=None,
+                 databases=[]):
         self.id = id
         self.title = title
         self.model_details = model_details
+        self.databases = []
+        self.databases.extend(databases)
+        self._database_status = {}
 
         #: List of plain text comments. These will be added to the top of
         #: the mmCIF file.
@@ -93,7 +101,7 @@ def __init__(self, title=None, id='model', model_details=None):
         #: List of all authors of this system, as a list of strings (last name
         #: followed by initials, e.g. "Smith, A.J."). When writing out a file,
         #: if this list is empty, the set of all citation authors (see
-        #: :attr:`Citation.authors`) is used instead.
+        #: :class:`Citation`) is used instead.
         self.authors = []
 
         #: List of all grants that supported this work. See :class:`Grant`.
@@ -663,6 +671,24 @@ def _check_after_write(self):
                     "can be grouped." % g)
 
 
+class Database(object):
+    """Information about a System that is part of an official database.
+
+       If a :class:`System` is part of one or more official databases
+       (e.g. PDB, PDB-Dev, SwissModel), this class contains details of the
+       database identifiers. It should be passed to the :class:`System`
+       constructor.
+
+       :param str id: Abbreviated name of the database (e.g. PDB).
+       :param str code: Identifier from the database (e.g. 1abc).
+       :param str doi: Digital Object Identifier of the database entry.
+       :param str accession: Extended accession code of the database entry.
+       """
+    def __init__(self, id, code, doi=None, accession=None):
+        self.id, self.code = id, code
+        self.doi, self.accession = doi, accession
+
+
 class Software(object):
     """Software used as part of the modeling protocol.
 
@@ -681,7 +707,7 @@ class Software(object):
        passed to :class:`ihm.startmodel.StartingModel`,
        :class:`ihm.protocol.Step`,
        :class:`ihm.analysis.Step`, or
-       :class:`ihm.restraint.PredictedContactResstraint` objects.
+       :class:`ihm.restraint.PredictedContactRestraint` objects.
     """
     def __init__(self, name, classification, description, location,
                  type='program', version=None, citation=None):
@@ -902,7 +928,7 @@ def __str__(self):
 
     def __get_weight(self):
         # Calculate weight from formula
-        if self.formula is None:
+        if self.formula in (None, unknown):
             return
         spl = self.formula.split()
         # Remove formal charge if present
@@ -1250,7 +1276,7 @@ def __init__(self, seq_id, entity=None, asym=None):
         self.seq_id = seq_id
 
     def atom(self, atom_id):
-        """Get a :class:`Atom` in this residue with the given name."""
+        """Get a :class:`~ihm.Atom` in this residue with the given name."""
         return Atom(residue=self, id=atom_id)
 
     def _get_auth_seq_id(self):
@@ -1380,11 +1406,11 @@ def get_chem_comp(s):
 
         #: String descriptors of branched chemical structure.
         #: These generally only make sense for oligosaccharide entities,
-        #: and should be a list of :class:`BranchDescriptor` objects.
+        #: and should be a list of :class:`~ihm.BranchDescriptor` objects.
         self.branch_descriptors = []
 
         #: Any links between components in a branched entity.
-        #: This is a list of :class:`BranchLink` objects.
+        #: This is a list of :class:`~ihm.BranchLink` objects.
         self.branch_links = []
 
     def __str__(self):
@@ -1402,7 +1428,7 @@ def is_polymeric(self):
     def is_branched(self):
         """Return True iff this entity is branched (generally
            an oligosaccharide)"""
-        return ((len(self.sequence) > 0
+        return ((len(self.sequence) > 1
                  and isinstance(self.sequence[0], SaccharideChemComp)) or
                 (len(self.sequence) == 0 and self._hint_branched))
 
@@ -1491,7 +1517,7 @@ class AsymUnit(object):
        was modeled.
 
        Note that this class should not be used to describe crystal waters;
-       for that, see :class:`WaterAsymUnit`.
+       for that, see :class:`ihm.WaterAsymUnit`.
 
        :param entity: The unique sequence of this asymmetric unit.
        :type entity: :class:`Entity`
@@ -1523,12 +1549,13 @@ class AsymUnit(object):
               numbering. This differs from `auth_seq_id_map` as the original
               numbering need not follow any defined scheme, while
               `auth_seq_id_map` must follow certain PDB-defined rules. This
-              can either be a mapping type (dict, list, tuple) in which case
+              can be any mapping type (dict, list, tuple) in which case
               ``orig_auth_seq_id = orig_auth_seq_id_map[seq_id]``. If the
               mapping is None (the default), or a given `seq_id` cannot be
               found in the mapping, ``orig_auth_seq_id = auth_seq_id``.
               This mapping is only used in the various `scheme` tables, such
               as ``pdbx_poly_seq_scheme``.
+
        See :attr:`System.asym_units`.
     """
 
@@ -1545,6 +1572,11 @@ def __init__(self, entity, details=None, auth_seq_id_map=0, id=None,
         self.id = id
         self._strand_id = strand_id
 
+        #: For branched entities read from files, mapping from provisional
+        #: to final internal numbering (`seq_id`), or None if no mapping is
+        #: necessary. See :meth:`ihm.model.Model.add_atom`.
+        self.num_map = None
+
     def _get_auth_seq_id_ins_code(self, seq_id):
         if isinstance(self.auth_seq_id_map, int):
             return seq_id + self.auth_seq_id_map, None
@@ -1604,13 +1636,14 @@ class WaterAsymUnit(AsymUnit):
     """
 
     def __init__(self, entity, number, details=None, auth_seq_id_map=0,
-                 id=None, strand_id=None):
+                 id=None, strand_id=None, orig_auth_seq_id_map=None):
         if entity.type != 'water':
             raise TypeError(
                 "WaterAsymUnit can only be used for water entities")
         super(WaterAsymUnit, self).__init__(
             entity, details=details, auth_seq_id_map=auth_seq_id_map,
-            id=id, strand_id=strand_id)
+            id=id, strand_id=strand_id,
+            orig_auth_seq_id_map=orig_auth_seq_id_map)
         self.number = number
         self._water_sequence = [entity.sequence[0]] * number
 

diff --git a/modules/core/dependency/python-ihm/ihm/analysis.py b/modules/core/dependency/python-ihm/ihm/analysis.py
@@ -8,9 +8,10 @@
 class Step(object):
     """A single step in an :class:`Analysis`.
 
-       Normally one of the base classes is used; see :class:`FilterStep`,
-       :class:`ClusterStep`, :class:`RescoreStep`, :class:`ValidationStep`,
-       :class:`OtherStep`, and :class:`EmptyStep`.
+       Normally one of the more specific derived classes is used;
+       see :class:`FilterStep`, :class:`ClusterStep`, :class:`RescoreStep`,
+       :class:`ValidationStep`, and :class:`EmptyStep`, although this base
+       class can be used for a generic 'other' step.
 
        :param str feature: feature energy/score;RMSD;dRMSD;other
        :param int num_models_begin: The number of models at the beginning