From 567b70e8235d66e4f677a51ef0ee7bfee85b9986 Mon Sep 17 00:00:00 2001 From: gwirn <71886945+gwirn@users.noreply.github.com> Date: Fri, 20 Sep 2024 19:45:04 +0200 Subject: [PATCH] update README, fix data reading, add own index numpy --- examples/README.md | 2 +- src/molearn/data/pdb_data.py | 7 +++++-- src/molearn/data/prepare.py | 20 +++++++++++++------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/examples/README.md b/examples/README.md index 5047470..83da41e 100644 --- a/examples/README.md +++ b/examples/README.md @@ -39,4 +39,4 @@ In `xbb_foldingnet_checkpoints`, an example output generated when the *foldingne #### Analysis examples * `analysis_example.py`: minimal example of analysis of trained neural network. This script operates on the content of the `data` and `xbb_foldingnet_checkpoints` folders. Note that more detailed explanations on analysis are available on our [molearn notebooks](https://github.com/Degiacomi-Lab/molearn_notebook) -* `interpolation_example.py`: this example demonstrates how to generate interpolations between two positions in the latent space in two ways (1) as a straight line or (2) using the A* shortest path algorithm on a DOPE score landscape. +* `interpolation_example.py`: this example demonstrates how to generate interpolations between two positions in the latent space in two ways (1) as a straight line or (2) using the A* shortest path algorithm on an input-to-decoded RMSD landscape. diff --git a/src/molearn/data/pdb_data.py b/src/molearn/data/pdb_data.py index 59bab91..255259e 100644 --- a/src/molearn/data/pdb_data.py +++ b/src/molearn/data/pdb_data.py @@ -70,6 +70,9 @@ def import_pdb(self, filename: str | list[str], topology: str | None = None): if isinstance(filename, list) and topology is None: first_universe = mda.Universe(filename[0]) self._mol = mda.Universe(first_universe._topology, filename) + if isinstance(filename, list) and topology is not None: + first_universe = mda.Universe(topology[0], filename[0]) + self._mol = mda.Universe(first_universe._topology, filename) elif topology is None: self._mol = mda.Universe(filename) else: @@ -154,10 +157,10 @@ def frame(self): data = [] for ci, i in enumerate(self._mol.atoms): intermediate_data = [] - intermediate_data.append("ATOM") + intermediate_data.append(i.record_type) # i.index would also be an option but is different from original PDBData # replaces M.data["index"] = np.arange(self._mol.coordinates.shape[1]) - intermediate_data += [ci, i.name, i.resname, i.segid, i.resid] + intermediate_data += [ci, i.name, i.resname, i.chainID, i.resid] try: intermediate_data.append(i.occupancy) except (mda.exceptions.NoDataError, IndexError): diff --git a/src/molearn/data/prepare.py b/src/molearn/data/prepare.py index 59e0119..4bb4257 100644 --- a/src/molearn/data/prepare.py +++ b/src/molearn/data/prepare.py @@ -365,18 +365,24 @@ def stride(self) -> None: self.cluster_idx = stride_idx self.cluster_method = f"STRIDE_{self.n_cluster}" - def own_idx(self, file_path: str): + def own_idx(self, file_path: str | np.ndarray[tuple[int], np.dtype[np.int64]]): """ Provide indices for frames to create a new trajectory. Useful if trajectory should be sub sampled by some external metric. - :param str file_path: path where the file storing the indices is located. Needs to have each index in a separate line. + :param str | np.ndarray[tuple[int], np.dtype[np.int64]] file_path: path where the file storing the indices is located. Needs to have each index in a separate line. Or can be a numpy array. """ - provided_idx = [] - with open(file_path, "r") as ifile: - for i in ifile: - provided_idx.append(int(i)) - self.train_idx = np.asarray(provided_idx) + if isinstance(file_path, str): + provided_idx = [] + with open(file_path, "r") as ifile: + for i in ifile: + provided_idx.append(int(i)) + provided_idx = np.asarray(provided_idx) + elif isinstance(file_path, np.ndarray): + provided_idx = file_path + else: + raise ValueError("Provided indices are in an incompatible format") + self.train_idx = provided_idx self.cluster_idx = np.arange(len(self.train_idx)) self.cluster_method = "PROVIDED"