update README, fix data reading, add own index numpy

Degiacomi-Lab · Sep 20, 2024 · 567b70e · 567b70e
1 parent beea1cc
commit 567b70e
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 10 deletions.
diff --git a/examples/README.md b/examples/README.md
@@ -39,4 +39,4 @@ In `xbb_foldingnet_checkpoints`, an example output generated when the *foldingne
 #### Analysis examples
 
 * `analysis_example.py`: minimal example of analysis of trained neural network. This script operates on the content of the `data` and `xbb_foldingnet_checkpoints` folders. Note that more detailed explanations on analysis are available on our [molearn notebooks](https://github.com/Degiacomi-Lab/molearn_notebook)
-* `interpolation_example.py`: this example demonstrates how to generate interpolations between two positions in the latent space in two ways (1) as a straight line or (2) using the A* shortest path algorithm on a DOPE score landscape.
+* `interpolation_example.py`: this example demonstrates how to generate interpolations between two positions in the latent space in two ways (1) as a straight line or (2) using the A* shortest path algorithm on an input-to-decoded RMSD landscape.
diff --git a/src/molearn/data/pdb_data.py b/src/molearn/data/pdb_data.py
@@ -70,6 +70,9 @@ def import_pdb(self, filename: str | list[str], topology: str | None = None):
         if isinstance(filename, list) and topology is None:
             first_universe = mda.Universe(filename[0])
             self._mol = mda.Universe(first_universe._topology, filename)
+        if isinstance(filename, list) and topology is not None:
+            first_universe = mda.Universe(topology[0], filename[0])
+            self._mol = mda.Universe(first_universe._topology, filename)
         elif topology is None:
             self._mol = mda.Universe(filename)
         else:
@@ -154,10 +157,10 @@ def frame(self):
         data = []
         for ci, i in enumerate(self._mol.atoms):
             intermediate_data = []
-            intermediate_data.append("ATOM")
+            intermediate_data.append(i.record_type)
             # i.index would also be an option but is different from original PDBData
             # replaces M.data["index"] = np.arange(self._mol.coordinates.shape[1])
-            intermediate_data += [ci, i.name, i.resname, i.segid, i.resid]
+            intermediate_data += [ci, i.name, i.resname, i.chainID, i.resid]
             try:
                 intermediate_data.append(i.occupancy)
             except (mda.exceptions.NoDataError, IndexError):

diff --git a/src/molearn/data/prepare.py b/src/molearn/data/prepare.py
@@ -365,18 +365,24 @@ def stride(self) -> None:
         self.cluster_idx = stride_idx
         self.cluster_method = f"STRIDE_{self.n_cluster}"
 
-    def own_idx(self, file_path: str):
+    def own_idx(self, file_path: str | np.ndarray[tuple[int], np.dtype[np.int64]]):
         """
         Provide indices for frames to create a new trajectory.
         Useful if trajectory should be sub sampled by some external metric.
 
-        :param str file_path: path where the file storing the indices is located. Needs to have each index in a separate line.
+        :param str  | np.ndarray[tuple[int], np.dtype[np.int64]] file_path: path where the file storing the indices is located. Needs to have each index in a separate line. Or can be a numpy array.
         """
-        provided_idx = []
-        with open(file_path, "r") as ifile:
-            for i in ifile:
-                provided_idx.append(int(i))
-        self.train_idx = np.asarray(provided_idx)
+        if isinstance(file_path, str):
+            provided_idx = []
+            with open(file_path, "r") as ifile:
+                for i in ifile:
+                    provided_idx.append(int(i))
+            provided_idx = np.asarray(provided_idx)
+        elif isinstance(file_path, np.ndarray):
+            provided_idx = file_path
+        else:
+            raise ValueError("Provided indices are in an incompatible format")
+        self.train_idx = provided_idx
         self.cluster_idx = np.arange(len(self.train_idx))
         self.cluster_method = "PROVIDED"