diff --git a/docs/simulations.rst b/docs/simulations.rst index ff034cfb..937bbb2e 100644 --- a/docs/simulations.rst +++ b/docs/simulations.rst @@ -287,16 +287,18 @@ include parameters for data analysis here. - :code:`None`: No weight combination. - :code:`final`: Combine the final weights. - - :code:`avg`: Combine the weights averaged over from last time the Wang-Landau incrementor was updated. Notably, the time-averaged weights tend to be very noisy at the beginning of the simulation and can drive the combined weights in a bad direction. Therefore, we recommend specifying the parameter :code:`rmse_cutoff` to only use the time-averaged weights when the weights are not changing too much. For more details, check the description of the parameter :code:`rmse_cutoff` below. + - :code:`avg`: Combine the weights averaged over from last time the Wang-Landau incrementor was updated. For more details about weight combination, please refer to :ref:`doc_w_schemes`. - :code:`rmse_cutoff`: (Optional, Default: :code:`None`) - The cutoff for the root-mean-square error (RMSE) between the weights at the end of the current iteration + The cutoff for the root-mean-square error (RMSE) between the weights of the current iteration and the weights averaged over from the last time the Wang-Landau incrementor was updated. - For each replica, the time-averaged weights will be used in weight combination only if the RMSE is smaller than the cutoff. - Otherwise, the final weights will still be used. If this parameter is not specified, then time-averaged weights will always be used, which could be problematic - since time-averaged weights tend to be very noisy at the beginning of the simulation. Note that this parameter is only meanful when :code:`w_combine` is set to :code:`avg`. + For each replica, the RMSE between the averaged weights and the current weights will be calculated. + When :code:`rmse_cutoff` is specified, weight combination will be performed only if the maximum RMSE across all replicas + is smaller than the cutoff. Otherwise, weight combination is deactivated (even if :code:`w_combine` is specified) + because a larger RMSE indicates that the weights are noisy and should not be combined. + The default value is infinity, which means that weight combination will always be performed if :code:`w_combine` is specified. The units of the cutoff are :math:`k_B T`. - :code:`N_cutoff`: (Optional, Default: 1000) The histogram cutoff. -1 means that no histogram correction will be performed. diff --git a/ensemble_md/cli/run_EEXE.py b/ensemble_md/cli/run_EEXE.py index 953fb146..d8cf5be5 100644 --- a/ensemble_md/cli/run_EEXE.py +++ b/ensemble_md/cli/run_EEXE.py @@ -153,7 +153,7 @@ def main(): # Note after `get_swapping_pattern`, `states_` and `weights_` won't be necessarily # since they are updated by `get_swapping_pattern`. (Even if the function does not explicitly # returns `states_` and `weights_`, `states_` and `weights_` can still be different after - # the use of the function.) Therefore, here we create copyes for `states_` and `weights_` + # the use of the function.) Therefore, here we create copies for `states_` and `weights_` # before the use of `get_swapping_pattern`, so we can use them in `histogram_correction`, # `combine_weights` and `update_MDP`. states = copy.deepcopy(states_) @@ -162,11 +162,9 @@ def main(): # 3-3. Perform histogram correction/weight combination if wl_delta != [None for i in range(EEXE.n_sim)]: # weight-updating - print(f'\nCurrent Wang-Landau incrementors: {wl_delta}') + print(f'\nCurrent Wang-Landau incrementors: {wl_delta}\n') - # (1) First we prepare the weights to be combined. For each replica, weights to be combined - # could be either the final weights or the weights averaged since the last update of the - # Wang-Landau incrementor. See the function `prepare_weights` for details. + # (1) First we prepare the weights to be combined. # Note that although averaged weights are sometimes used for histogram correction/weight combination, # the final weights are always used for calculating the acceptance ratio. if EEXE.N_cutoff != -1 or EEXE.w_combine is not None: @@ -178,21 +176,29 @@ def main(): # The product of this step should always be named as "weights" to be used in update_MDP if EEXE.N_cutoff != -1 and EEXE.w_combine is not None: # perform both - weights_preprocessed = EEXE.histogram_correction(weights_input, counts) - weights, g_vec = EEXE.combine_weights(weights_preprocessed) # inverse-variance weighting seems worse # noqa: E501 - EEXE.g_vecs.append(g_vec) + if weights_input is None: + # Then only histogram correction will be performed + print('Note: Weight combination is deactivated because the weights are too noisy.') + weights = EEXE.histogram_correction(weights, counts) + else: + weights_preprocessed = EEXE.histogram_correction(weights_input, counts) + weights, g_vec = EEXE.combine_weights(weights_preprocessed) # inverse-variance weighting seems worse # noqa: E501 + EEXE.g_vecs.append(g_vec) elif EEXE.N_cutoff == -1 and EEXE.w_combine is not None: # only perform weight combination - print('\nNote: No histogram correction will be performed.') - weights, g_vec = EEXE.combine_weights(weights_input) # inverse-variance weighting seems worse - EEXE.g_vecs.append(g_vec) + print('Note: No histogram correction will be performed.') + if weights_input is None: + print('Note: Weight combination is deactivated because the weights are too noisy.') + else: + weights, g_vec = EEXE.combine_weights(weights_input) # inverse-variance weighting seems worse + EEXE.g_vecs.append(g_vec) elif EEXE.N_cutoff != -1 and EEXE.w_combine is None: # only perform histogram correction - print('\nNote: No weight combination will be performed.') + print('Note: No weight combination will be performed.') weights = EEXE.histogram_correction(weights_input, counts) else: w_for_printing = EEXE.combine_weights(weights)[1] - print('\nNote: No histogram correction will be performed.') + print('Note: No histogram correction will be performed.') print('Note: No weight combination will be performed.') print(f'The alchemical weights of all states: \n {list(np.round(w_for_printing, decimals=3))}') diff --git a/ensemble_md/ensemble_EXE.py b/ensemble_md/ensemble_EXE.py index 552df960..ed46827f 100644 --- a/ensemble_md/ensemble_EXE.py +++ b/ensemble_md/ensemble_EXE.py @@ -160,7 +160,7 @@ def set_params(self, analysis): "proposal": 'exhaustive', "acceptance": "metropolis", "w_combine": None, - "rmse_cutoff": None, + "rmse_cutoff": np.inf, "N_cutoff": 1000, "n_ex": 'N^3', # only active for multiple swaps. "verbose": True, @@ -205,11 +205,11 @@ def set_params(self, analysis): if self.err_method not in [None, 'propagate', 'bootstrap']: raise ParameterError("The specified method for error estimation is not available. Available options include 'propagate', and 'bootstrap'.") # noqa: E501 - if self.w_combine == 'avg' and self.rmse_cutoff is None: - self.warnings.append('Warning: We recommend setting rmse_cutoff when w_combine is set to "avg".') + if self.w_combine is not None and self.rmse_cutoff == np.inf: + self.warnings.append('Warning: We recommend setting rmse_cutoff when w_combine is used.') - if self.rmse_cutoff is not None: - if type(self.rmse_cutoff) is not float: + if self.rmse_cutoff != np.inf: + if type(self.rmse_cutoff) is not float and type(self.rmse_cutoff) is not int: raise ParameterError("The parameter 'rmse_cutoff' should be a float.") params_int = ['n_sim', 'n_iter', 's', 'N_cutoff', 'df_spacing', 'n_ckpt', 'n_bootstrap'] # integer parameters # noqa: E501 @@ -223,11 +223,9 @@ def set_params(self, analysis): if type(getattr(self, i)) != int: raise ParameterError(f"The parameter '{i}' should be an integer.") - params_pos = ['n_sim', 'n_iter', 'n_ckpt', 'df_spacing', 'n_bootstrap'] # positive parameters + params_pos = ['n_sim', 'n_iter', 'n_ckpt', 'df_spacing', 'n_bootstrap', 'rmse_cutoff'] # positive parameters if self.nst_sim is not None: params_pos.append('nst_sim') - if self.rmse_cutoff is not None: - params_pos.append('rmse_cutoff') for i in params_pos: if getattr(self, i) <= 0: raise ParameterError(f"The parameter '{i}' should be positive.") @@ -750,82 +748,6 @@ def extract_final_log_info(self, log_files): return wl_delta, weights, counts - def get_averaged_weights(self, log_files): - """ - For each replica, calculate the averaged weights (and the associated error) from the time series - of the weights since the previous update of the Wang-Landau incrementor. - - Parameters - ---------- - log_files : list - A list of file paths to GROMACS LOG files of different replicas. - - Returned - -------- - weights_avg : list - A list of lists of weights averaged since the last update of the Wang-Landau - incrementor. The length of the list should be the number of replicas. - weights_err : list - A list of lists of errors corresponding to the values in :code:`weights_avg`. - """ - for i in range(self.n_sim): - weights, _, wl_delta, _ = gmx_parser.parse_log(log_files[i]) - if self.current_wl_delta[i] == wl_delta: - self.updating_weights[i] += weights # expand the list - else: - self.current_wl_delta[i] = wl_delta - self.updating_weights[i] = weights - - # shape of self.updating_weights is (n_sim, n_points, n_states), but n_points can be different - # for different replicas, which will error out np.mean(self.updating_weights, axis=1) - weights_avg = [np.mean(self.updating_weights[i], axis=0).tolist() for i in range(self.n_sim)] - weights_err = [] - for i in range(self.n_sim): - if len(self.updating_weights[i]) == 1: # this would lead to a RunTime Warning and nan - weights_err.append([0] * self.n_sub) # in `weighted_mean``, a simple average will be returned. - else: - weights_err.append(np.std(self.updating_weights[i], axis=0, ddof=1).tolist()) - - return weights_avg, weights_err - - def prepare_weights(self, weights_avg, weights_final): - """ - Prepared weights to be combined by the function :code:`combine_weights`. - For each replica, if the RMSE between the averaged weights and the final weights is - smaller than the cutoff specified in the input YAML file (the parameter :code:`rmse_cutoff`), - then the averaged weights will be chosen for the output (for the weight combination). Otherwise, - the final weights will be used. - - Parameters - ---------- - weights_avg : list - A list of lists of weights averaged since the last update of the Wang-Landau - incrementor. The length of the list should be the number of replicas. - weights_final : list - A list of lists of final weights of all simulations. The length of the list should - be the number of replicas. - - Returns - ------- - weights_output : list - A list of lists of weights to be combined. - """ - rmse_list = [utils.calc_rmse(weights_avg[i], weights_final[i]) for i in range(self.n_sim)] - - if self.w_combine == 'final': - weights_output = weights_final - elif self.w_combine == 'avg': - weights_output = [] - for i in range(self.n_sim): - if rmse_list[i] < self.rmse_cutoff: - print('Note: The time-averaged weights will be used for weight combination.') - weights_output.append(weights_avg[i]) - else: - print('Note: The final weights will be used for weight combination, as the time-averaged weights still fluctuate too much.') # noqa: E501 - weights_output.append(weights_final[i]) - - return weights_output - @staticmethod def identify_swappable_pairs(states, state_ranges, neighbor_exchange, add_swappables=None): """ @@ -1219,6 +1141,81 @@ def histogram_correction(self, weights, counts): return weights + def get_averaged_weights(self, log_files): + """ + For each replica, calculate the averaged weights (and the associated error) from the time series + of the weights since the previous update of the Wang-Landau incrementor. + + Parameters + ---------- + log_files : list + A list of file paths to GROMACS LOG files of different replicas. + + Returned + -------- + weights_avg : list + A list of lists of weights averaged since the last update of the Wang-Landau + incrementor. The length of the list should be the number of replicas. + weights_err : list + A list of lists of errors corresponding to the values in :code:`weights_avg`. + """ + for i in range(self.n_sim): + weights, _, wl_delta, _ = gmx_parser.parse_log(log_files[i]) + if self.current_wl_delta[i] == wl_delta: + self.updating_weights[i] += weights # expand the list + else: + self.current_wl_delta[i] = wl_delta + self.updating_weights[i] = weights + + # shape of self.updating_weights is (n_sim, n_points, n_states), but n_points can be different + # for different replicas, which will error out np.mean(self.updating_weights, axis=1) + weights_avg = [np.mean(self.updating_weights[i], axis=0).tolist() for i in range(self.n_sim)] + weights_err = [] + for i in range(self.n_sim): + if len(self.updating_weights[i]) == 1: # this would lead to a RunTime Warning and nan + weights_err.append([0] * self.n_sub) # in `weighted_mean``, a simple average will be returned. + else: + weights_err.append(np.std(self.updating_weights[i], axis=0, ddof=1).tolist()) + + return weights_avg, weights_err + + def prepare_weights(self, weights_avg, weights_final): + """ + Prepared weights to be combined by the function :code:`combine_weights`. + For each replica, the RMSE between the averaged weights and the final weights is calculated. If the + maximum of the RMSEs of all replicas is smaller than the cutoff specified in the input YAML file + (the parameter :code:`rmse_cutoff`), either final weights or time-averaged weights will be used + (depending on the value of the parameter :code:`w_combine`). Otherwise, :code:`None` will be returned, + which will lead to deactivation of weight combination in the CLI :code:`run_EEXE`. + + Parameters + ---------- + weights_avg : list + A list of lists of weights averaged since the last update of the Wang-Landau + incrementor. The length of the list should be the number of replicas. + weights_final : list + A list of lists of final weights of all simulations. The length of the list should + be the number of replicas. + + Returns + ------- + weights_output : list + A list of lists of weights to be combined. + """ + rmse_list = [utils.calc_rmse(weights_avg[i], weights_final[i]) for i in range(self.n_sim)] + rmse_str = ', '.join([f'{i:.2f}' for i in rmse_list]) + print(f'RMSE between the final weights and time-averaged weights for each replica: {rmse_str} kT') + if np.max(rmse_list) < self.rmse_cutoff: + # Weight combination will be activated + if self.w_combine == 'final': + weights_output = weights_final + elif self.w_combine == 'avg': + weights_output = weights_avg + else: + weights_output = None + + return weights_output + def combine_weights(self, weights, weights_err=None): """ Combine alchemical weights across multiple replicas. Note that if @@ -1229,7 +1226,7 @@ def combine_weights(self, weights, weights_err=None): Parameters ---------- weights : list - A list of Wang-Landau weights of ALL simulations + A list of Wang-Landau weights of ALL simulations. Returns -------