Merge branch 'master' of https://github.com/Degiacomi-Lab/molearn

Degiacomi-Lab · Sep 12, 2023 · 58f9b90 · 58f9b90
2 parents 379b238 + 98b3537
commit 58f9b90
Show file tree

Hide file tree

Showing 7 changed files with 64 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -2,6 +2,9 @@
 
 [![status](https://joss.theoj.org/papers/781a409020f1c37417067aef6fbc3217/status.svg)](https://joss.theoj.org/papers/781a409020f1c37417067aef6fbc3217)
 [![Documentation Status](https://readthedocs.org/projects/molearn/badge/?version=latest)](https://molearn.readthedocs.io/en/latest/?badge=latest)
+[![DOI](https://zenodo.org/badge/145391811.svg)](https://zenodo.org/badge/latestdoi/145391811)
+
+
 
 *protein conformational spaces meet machine learning*
 
@@ -42,34 +45,45 @@ To run the GUI:
 
 ## Installation ##
 
+#### Anaconda installation from conda-forge ####
+
 The most recent release can be obtained through Anaconda:
 
 `conda install molearn -c conda-forge` or the much faster `mamba install -c conda-forge molearn`
 
+#### Clone the repo and manually install ####
+
 Manual installation requires the following three steps:
-* Clone the repository 
-* Install the necessary requirements with `mamba install -c conda-forge --only-deps molearn`. The option `--only-deps` will install the molearn dependencies but not molearn itself.
+* Clone the repository `git clone https://github.com/Degiacomi-Lab/molearn.git`
+* Install all required packages (see section *Dependencies > Required Packages*, above). The easiest way is by calling `mamba install -c conda-forge --only-deps molearn`, where the option `--only-deps` will install the molearn required dependencies but not molearn itself. Optionally, packages enabling additional molearn functionalities can also be installed. This has to be done manually (see links in *Dependencies > Optional Packages*).
 * Use pip to install molearn from within the molearn directory `python -m pip install .`
 
+#### Using molearn without installation ####
+
 Molearn can used without installation by making the sure the requirements above are met, and adding the `src` directory to your path at the beginning of every script, e.g.:
 ```
 import sys
 sys.path.insert(0, 'path/to/molearn/src')
 import molearn
 ```
 
+
+
 ## Usage ##
 
 * See example scripts in the `examples` folder.
 * Jupyter notebook tutorials describing the usage of a trained neural network are available [here](https://github.com/Degiacomi-Lab/molearn_notebook).
 * software API and a FAQ page are available at [molearn.readthedocs.io](https://molearn.readthedocs.io/).
 
-## Reference ##
+## References ##
+
+If you use `molearn` in your work, please cite: [S.C. Musson and M.T. Degiacomi (2023). Molearn: a Python package streamlining the design of generative models of biomolecular dynamics. Journal of Open Source Software, 8(89), 5523](https://doi.org/10.21105/joss.05523)
 
-If you use molearn in your work, please cite:
+Theory and benchmarks of a neural network training against protein conformational spaces are presented here:
 [V.K. Ramaswamy, S.C. Musson, C.G. Willcocks, M.T. Degiacomi (2021). Learning protein conformational space with convolutions and latent interpolations, Physical Review X 11](
 https://journals.aps.org/prx/abstract/10.1103/PhysRevX.11.011052)
 
-## Contact ##
+## Contributing ##
 
-For any question please contact [email protected]
+For information on how to report bugs, request new features, or contribute to the code, please see [CONTRIBUTING.md](CONTRIBUTING.md).
+For any other question please contact [email protected].
diff --git a/paper/paper.md b/paper/paper.md
@@ -17,7 +17,7 @@ authors:
     corresponding: true
     affiliation: 1
 affiliations:
- - name: Department of Physics, Durham University, UK
+ - name: Department of Physics, Durham University, United Kingdom
    index: 1
 date: 09 May 2023
 bibliography: paper.bib

diff --git a/src/molearn/analysis/analyser.py b/src/molearn/analysis/analyser.py
@@ -21,7 +21,16 @@
     print('Error importing modeller: ')
     print(e)
 
-from ..scoring import Parallel_DOPE_Score, Parallel_Ramachandran_Score
+try:
+    from ..scoring import Parallel_DOPE_Score
+except ImportError as e:
+    print('Import Error captured while trying to import Parallel_DOPE_Score, it is likely that you dont have Modeller installed')
+    print(e)
+try:
+    from ..scoring import Parallel_Ramachandran_Score
+except ImportError as e:
+    print('Import Error captured while trying to import Parallel_Ramachandran_Score, it is likely that you dont have cctbx/iotbx installed')
+    print(e)
 from ..data import PDBData
 
 from ..utils import as_numpy

diff --git a/src/molearn/data/pdb_data.py b/src/molearn/data/pdb_data.py
@@ -68,6 +68,10 @@ def atomselect(self, atoms, ignore_atoms=[]):
                     _plain_atoms.append(self._mol.knowledge['atomtype'][a])
                 elif a[:-1] in self._mol.knowledge['atomtype']:
                     _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-1]])
+                    print(f'Could not find {a}. I am assuing you meant {a[:-1]} instead.')
+                elif a[:-2] in self._mol.knowledge['atomtype']:
+                    _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-2]])
+                    print(f'Could not find {a}. I am assuming you meant {a[:-2]} instead.')
                 else:
                     _plain_atoms.append(self._mol.knowledge['atomtype'][a])  # if above failed just raise the keyerror
             _atoms = [atom for atom, element in zip(_atoms, _plain_atoms) if element != 'H']

diff --git a/src/molearn/loss_functions/openmm_thread.py b/src/molearn/loss_functions/openmm_thread.py
@@ -14,6 +14,7 @@
 
 import torch
 import numpy as np
+from copy import deepcopy
 
 
 class ModifiedForceField(ForceField):
@@ -193,6 +194,12 @@ def ignore_hydrogen(self):
             self.forcefield.registerPatch(patchData)
 
     def atomselect(self, atoms):
+        atoms = deepcopy(atoms)
+        if 'OT2' in atoms:
+            atoms.append('OXT')
+        if 'OT1' in atoms:
+            atoms.append('OXT')
+
         for name, template in self.forcefield._templates.items():
             patchData = ForceField._PatchData(name+'_leave_only_'+'_'.join(atoms), 1)
 

diff --git a/src/molearn/scoring/__init__.py b/src/molearn/scoring/__init__.py
@@ -1,15 +1,26 @@
 """
 `Scoring` holds classes for calculating DOPE and Ramachandran scores.
 """
+class RaiseErrorOnInit:
+    module = 'unknown module is creating an ImportError'
+    def __init__(self,*args, **kwargs):
+        raise ImportError(f'{self.module}. Therefore {self.__class__.__name__} can not be used')
 try:
     from .dope_score import Parallel_DOPE_Score, DOPE_Score
 except ImportError as e:
     import warnings
     warnings.warn(f"{e}. Modeller is probably not installed.")
-
+    class DOPE_Score(RaiseErrorOnInit):
+        module = e
+    class Parallel_DOPE_Score(RaiseErrorOnInit):
+        module = e
 
 try:
     from .ramachandran_score import Parallel_Ramachandran_Score, Ramachandran_Score
 except Exception as e:
+    class Parallel_Ramachandran_Score(RaiseErrorOnInit):
+        module = e
+    class Ramachandran_Score(RaiseErrorOnInit):
+        module = e
     import warnings
     warnings.warn(f"{e}. Will not be able to calculate Ramachandran score.")
diff --git a/src/molearn/scoring/dope_score.py b/src/molearn/scoring/dope_score.py
@@ -19,6 +19,7 @@ class DOPE_Score:
     '''
     This class contains methods to calculate dope without saving to save and load PDB files for every structure. Atoms in a biobox coordinate tensor are mapped to the coordinates in the modeller model directly.
     '''
+    atom_map = {('ILE', 'CD1'):('ILE', 'CD')}
 
     def __init__(self, mol):
         '''
@@ -55,12 +56,20 @@ def __init__(self, mol):
                     else:
                         where_arg = (atom_residue==(np.array([j.name, j_residue_name, j.residue.index+offset], dtype=object))).all(axis=1)
                         where = np.where(where_arg)[0]
+                        if len(where)==0:
+                            if (j_residue_name, j.name) in self.atom_map:
+                                alt_residue_name, alt_name = self.atom_map[(j_residue_name, j.name)]
+                                where_arg = (atom_residue==(np.array([alt_name, alt_residue_name, j.residue.index+offset], dtype=object))).all(axis=1)
+                                where = np.where(where_arg)[0]
+                            else:
+                                print(f'Cant find {j.name} in the atoms {atom_residue[atom_residue[:,2]==j.residue.index+offset]} try adding a mapping to DOPE_Score.atom_map')
                         atom_order.append(int(where))
         self.fast_atom_order = atom_order
         # check fast dope atoms
+        reverse_map = {value:key for key, value in self.atom_map.items()}
         for i, j in enumerate(self.fast_ss):
             if i<len(atom_residue):
-                assert _mol.data['name'][atom_order[i]]==j.name
+                assert _mol.data['name'][atom_order[i]]==j.name or reverse_map[(_mol.data['resname'][atom_order[i]], _mol.data['name'][atom_order[i]])][1]==j.name
         self.cg = ConjugateGradients()
         os.remove(tmp_file)