Skip to content

Commit

Permalink
update README, fix data reading, add own index numpy
Browse files Browse the repository at this point in the history
  • Loading branch information
gwirn committed Sep 20, 2024
1 parent beea1cc commit 567b70e
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 10 deletions.
2 changes: 1 addition & 1 deletion examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ In `xbb_foldingnet_checkpoints`, an example output generated when the *foldingne
#### Analysis examples

* `analysis_example.py`: minimal example of analysis of trained neural network. This script operates on the content of the `data` and `xbb_foldingnet_checkpoints` folders. Note that more detailed explanations on analysis are available on our [molearn notebooks](https://github.com/Degiacomi-Lab/molearn_notebook)
* `interpolation_example.py`: this example demonstrates how to generate interpolations between two positions in the latent space in two ways (1) as a straight line or (2) using the A* shortest path algorithm on a DOPE score landscape.
* `interpolation_example.py`: this example demonstrates how to generate interpolations between two positions in the latent space in two ways (1) as a straight line or (2) using the A* shortest path algorithm on an input-to-decoded RMSD landscape.
7 changes: 5 additions & 2 deletions src/molearn/data/pdb_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ def import_pdb(self, filename: str | list[str], topology: str | None = None):
if isinstance(filename, list) and topology is None:
first_universe = mda.Universe(filename[0])
self._mol = mda.Universe(first_universe._topology, filename)
if isinstance(filename, list) and topology is not None:
first_universe = mda.Universe(topology[0], filename[0])
self._mol = mda.Universe(first_universe._topology, filename)
elif topology is None:
self._mol = mda.Universe(filename)
else:
Expand Down Expand Up @@ -154,10 +157,10 @@ def frame(self):
data = []
for ci, i in enumerate(self._mol.atoms):
intermediate_data = []
intermediate_data.append("ATOM")
intermediate_data.append(i.record_type)
# i.index would also be an option but is different from original PDBData
# replaces M.data["index"] = np.arange(self._mol.coordinates.shape[1])
intermediate_data += [ci, i.name, i.resname, i.segid, i.resid]
intermediate_data += [ci, i.name, i.resname, i.chainID, i.resid]
try:
intermediate_data.append(i.occupancy)
except (mda.exceptions.NoDataError, IndexError):
Expand Down
20 changes: 13 additions & 7 deletions src/molearn/data/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,18 +365,24 @@ def stride(self) -> None:
self.cluster_idx = stride_idx
self.cluster_method = f"STRIDE_{self.n_cluster}"

def own_idx(self, file_path: str):
def own_idx(self, file_path: str | np.ndarray[tuple[int], np.dtype[np.int64]]):
"""
Provide indices for frames to create a new trajectory.
Useful if trajectory should be sub sampled by some external metric.
:param str file_path: path where the file storing the indices is located. Needs to have each index in a separate line.
:param str | np.ndarray[tuple[int], np.dtype[np.int64]] file_path: path where the file storing the indices is located. Needs to have each index in a separate line. Or can be a numpy array.
"""
provided_idx = []
with open(file_path, "r") as ifile:
for i in ifile:
provided_idx.append(int(i))
self.train_idx = np.asarray(provided_idx)
if isinstance(file_path, str):
provided_idx = []
with open(file_path, "r") as ifile:
for i in ifile:
provided_idx.append(int(i))
provided_idx = np.asarray(provided_idx)
elif isinstance(file_path, np.ndarray):
provided_idx = file_path
else:
raise ValueError("Provided indices are in an incompatible format")
self.train_idx = provided_idx
self.cluster_idx = np.arange(len(self.train_idx))
self.cluster_method = "PROVIDED"

Expand Down

0 comments on commit 567b70e

Please sign in to comment.