From 54e5264ae513a5a9bf448e9ea1ee370f3b1542b1 Mon Sep 17 00:00:00 2001 From: Wei-Tse Hsu Date: Fri, 18 Aug 2023 22:53:46 -0600 Subject: [PATCH] Added stitch_trajs and renamed the original functions to stitch_time_series and stitch_time_series_for_sim --- README.md | 1 + ensemble_md/analysis/analyze_traj.py | 44 +++++++++++++++++++++++++--- ensemble_md/cli/analyze_EEXE.py | 4 +-- ensemble_md/ensemble_EXE.py | 7 +++-- 4 files changed, 47 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a6e4bd70..ba271b4b 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ Ensemble Molecular Dynamics [![GitHub Actions Lint Status](https://github.com/wehs7661/ensemble_md/actions/workflows/lint.yaml/badge.svg)](https://github.com/wehs7661/ensemble_md/actions/workflows/lint.yaml) [![PyPI version](https://badge.fury.io/py/ensemble-md.svg)](https://badge.fury.io/py/ensemble-md) [![DOI](https://img.shields.io/badge/DOI-arxiv.org%2Fabs%2F2308.06938-green)](https://arxiv.org/abs/2308.06938) + [![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)](https://lbesson.mit-license.org/) [![Downloads](https://static.pepy.tech/badge/ensemble-md)](https://pepy.tech/project/ensemble-md) [![Twitter Follow](https://img.shields.io/twitter/follow/WeiTseHsu?style=social)](https://twitter.com/WeiTseHsu) diff --git a/ensemble_md/analysis/analyze_traj.py b/ensemble_md/analysis/analyze_traj.py index 883f7a20..4765181c 100644 --- a/ensemble_md/analysis/analyze_traj.py +++ b/ensemble_md/analysis/analyze_traj.py @@ -39,7 +39,7 @@ def extract_state_traj(dhdl): return traj -def stitch_trajs(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, save=True): +def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, save=True): """ Stitches the state-space/CV-space trajectories for each starting configuration from DHDL files or PLUMED output files generated at different iterations. @@ -49,7 +49,7 @@ def stitch_trajs(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, save=True files : list A list of lists of file names of GROMACS DHDL files or general GROMACS XVG files or PLUMED ouptput files. Specifically, :code:`files[i]` should be a list containing the files of interest from all iterations in - replica :code:`i`. + replica :code:`i`. The files should be sorted naturally. rep_trajs : list A list of lists that represents the replica space trajectories for each starting configuration. For example, :code:`rep_trajs[0] = [0, 2, 3, 0, 1, ...]` means that starting configuration 0 transitioned to replica 2, then @@ -117,7 +117,7 @@ def stitch_trajs(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, save=True return trajs -def stitch_trajs_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=True): +def stitch_time_series_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=True): """ Stitches the state-space/CV-space time series in the same replica/simulation folder. That is, the output time series is contributed by multiple different trajectories (initiated by @@ -128,7 +128,7 @@ def stitch_trajs_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=True): files : list A list of lists of file names of GROMACS DHDL files or general GROMACS XVG files or PLUMED output files. Specifically, :code:`files[i]` should be a list containing - the files of interest from all iterations in replica :code:`i`. + the files of interest from all iterations in replica :code:`i`. The files should be sorted naturally. shifts : list A list of values for shifting the state indices for each replica. The length of the list should be equal to the number of replicas. This is only needed when :code:`dhdl=True`. @@ -175,6 +175,42 @@ def stitch_trajs_for_sim(files, shifts=None, dhdl=True, col_idx=-1, save=True): return trajs +def stitch_trajs(gmx_executable, files, rep_trajs): + """ + Demuxes GROMACS trajectories from different replicas into individual continuous trajectories. + + Parameters + ---------- + gmx_executable : str + The path to the GROMACS executable. + files : list + A list of lists of file names of GROMACS XTC files. Specifically, :code:`files[i]` should be a list containing + the files of interest from all iterations in replica :code:`i`. The files should be sorted naturally. + rep_trajs : list + A list of lists that represents the replica space trajectories for each starting configuration. For example, + :code:`rep_trajs[0] = [0, 2, 3, 0, 1, ...]` means that starting configuration 0 transitioned to replica 2, then + 3, 0, 1, in iterations 1, 2, 3, 4, ..., respectively. + """ + n_sim = len(files) # number of replicas + n_iter = len(files[0]) # number of iterations per replica + + # First figure out which xtc files each starting configuration corresponds to + # files_sorted[i] contains the xtc files for starting configuration i sorted + # based on iteration indices + files_sorted = [[] for i in range(n_sim)] + for i in range(n_sim): + for j in range(n_iter): + files_sorted[i].append(files[rep_trajs[i][j]][j]) + + # Then, stitch the trajectories for each starting configuration + for i in range(n_sim): + print(f'Concatenating trajectories initiated by configuration {i} ...') + arguments = [gmx_executable, 'trjcat', '-f'] + arguments.extend(files_sorted[i]) + arguments.extend(['-o', f'traj_{i}.xtc']) + utils.run_gmx_cmd(arguments) + + def traj2transmtx(traj, N, normalize=True): """ Computes the transition matrix given a trajectory. For example, if a state-space diff --git a/ensemble_md/cli/analyze_EEXE.py b/ensemble_md/cli/analyze_EEXE.py index 789a6685..c1074274 100644 --- a/ensemble_md/cli/analyze_EEXE.py +++ b/ensemble_md/cli/analyze_EEXE.py @@ -157,7 +157,7 @@ def main(): print('2-1. Stitching trajectories for each starting configuration from dhdl files ...') dhdl_files = [natsort.natsorted(glob.glob(f'sim_{i}/iteration_*/*dhdl*xvg')) for i in range(EEXE.n_sim)] shifts = np.arange(EEXE.n_sim) * EEXE.s - state_trajs = analyze_traj.stitch_trajs(dhdl_files, rep_trajs, shifts=shifts, save=True) # length: the number of replicas # noqa: E501 + state_trajs = analyze_traj.stitch_time_series(dhdl_files, rep_trajs, shifts=shifts, save=True) # length: the number of replicas # noqa: E501 # 2-2. Plot the state-space trajectory print('\n2-2. Plotting transitions between different alchemical states ...') @@ -188,7 +188,7 @@ def main(): print('2-4. Stitching time series of state index for each alchemical range ...') dhdl_files = [natsort.natsorted(glob.glob(f'sim_{i}/iteration_*/*dhdl*xvg')) for i in range(EEXE.n_sim)] shifts = np.arange(EEXE.n_sim) * EEXE.s - state_trajs_for_sim = analyze_traj.stitch_trajs_for_sim(dhdl_files, shifts) + state_trajs_for_sim = analyze_traj.stitch_time_series_for_sim(dhdl_files, shifts) # 2-5. Plot the time series of state index for different alchemical ranges print('\n2-5. Plotting the time series of state index for different alchemical ranges ...') diff --git a/ensemble_md/ensemble_EXE.py b/ensemble_md/ensemble_EXE.py index 9c3d0cb9..9ba63ff6 100644 --- a/ensemble_md/ensemble_EXE.py +++ b/ensemble_md/ensemble_EXE.py @@ -1241,7 +1241,8 @@ def combine_weights(self, weights, weights_err=None): return weights, g_vec - def run_gmx_cmd(self, arguments): + @staticmethod + def run_gmx_cmd(arguments): """ Run a GROMACS command as a subprocess @@ -1321,7 +1322,7 @@ def run_grompp(self, n, swap_pattern): if rank == 0: print('Generating TPR files ...') if rank < self.n_sim: - returncode, stdout, stderr = self.run_gmx_cmd(args_list[rank]) + returncode, stdout, stderr = EnsembleEXE.run_gmx_cmd(args_list[rank]) if returncode != 0: print(f'Error on rank {rank} (return code: {returncode}):\n{stderr}') @@ -1360,7 +1361,7 @@ def run_mdrun(self, n): print('Running EXE simulations ...') if rank < self.n_sim: os.chdir(f'sim_{rank}/iteration_{n}') - returncode, stdout, stderr = self.run_gmx_cmd(arguments) + returncode, stdout, stderr = EnsembleEXE.run_gmx_cmd(arguments) if returncode != 0: print(f'Error on rank {rank} (return code: {returncode}):\n{stderr}') if self.rm_cpt is True: