From 150d6094f8dd9205433d720405289630d411fbed Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Wed, 24 Jan 2024 17:07:17 -0500 Subject: [PATCH 01/14] convergence plot can now plot multiple replicas --- safep/plotting.py | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/safep/plotting.py b/safep/plotting.py index 1ef1762..0076a41 100644 --- a/safep/plotting.py +++ b/safep/plotting.py @@ -50,10 +50,9 @@ def do_conv_plot(ax, X, fs, ferr, fwdColor, label=None): return ax -def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None, fontsize=12): +def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None, fontsize=12, errorbars=True): ''' - Convergence plot. Does the convergence calculation and plotting. - Arguments: u_nk, tau (an error tuning factor), units (kT or kcal/mol), RT + Convergence plot. Does the convergence plotting. Returns: a pyplot ''' if not lgndF: @@ -63,9 +62,35 @@ def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D lower = fs[-1]-ferr[-1] upper = fs[-1]+ferr[-1] - theax.fill_between([0,1],[lower, lower], [upper, upper], color=bwdColor, alpha=0.25) - theax.errorbar(np.arange(len(fs))/len(fs)+0.1, fs, yerr=ferr, marker='o', linewidth=1, color=fwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=fwdColor, ms=5) - theax.errorbar(np.arange(len(bs))/len(fs)+0.1, bs, yerr=berr, marker='o', linewidth=1, color=bwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=bwdColor, ms=5, linestyle='--') + theax.fill_between([0,1],[lower, lower], [upper, upper], color='gray', alpha=0.1) + + fwdparams = {'marker':'o', + 'linewidth':1, + 'color':fwdColor, + 'markerfacecolor':'white', + 'markeredgewidth':1, + 'markeredgecolor':fwdColor, + 'ms':5, + } + bwdparams = {'marker':'o', + 'linewidth':1, + 'color':bwdColor, + 'markerfacecolor':'white', + 'markeredgewidth':1, + 'markeredgecolor':bwdColor, + 'ms':5, + 'linestyle':'--'} + + xfwd = np.arange(len(fs))/len(fs)+0.1 + xbwd = np.arange(len(bs))/len(fs)+0.1 + + if errorbars: + theax.errorbar(xfwd, fs, yerr=ferr, **fwdparams) + theax.errorbar(xbwd, bs, yerr=berr, **bwdparams) + else: + theax.errorbar(xfwd, fs, **fwdparams) + theax.errorbar(xbwd, bs, **bwdparams) + theax.xaxis.set_ticks([0, 0.2, 0.4, 0.6, 0.8, 1]) From 6404a997bda2844bd2e354ce1049d81051c10845 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Wed, 24 Jan 2024 17:07:35 -0500 Subject: [PATCH 02/14] general plots now support multiple replicas. --- safep/plotting.py | 50 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/safep/plotting.py b/safep/plotting.py index 0076a41..a5d354c 100644 --- a/safep/plotting.py +++ b/safep/plotting.py @@ -134,26 +134,50 @@ def fb_discrepancy_hist(dG_f, dG_b): return plt.gca() -def plot_general(cumulative, cumulativeYlim, perWindow, perWindowYlim, RT, width=8, height=4, PDFtype='KDE', fontsize=12): - fig, axes = plt.subplots(3,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]}) - ((cumAx, del1),( eachAx, del2), (hystAx, pdfAx)) = axes - - fig.delaxes(del1) - fig.delaxes(del2) +def plot_general(cumulative, + cumulativeYlim, + perWindow, + perWindowYlim, + RT, + width=8, + height=4, + PDFtype='KDE', + fontsize=12, + fig=None, + axes=None, + hysttype='classic', + label=None, + color='blue', + errorbars=True): + if fig is None and axes is None: + fig, axes = plt.subplots(3,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]}) + ((cumAx, del1),( eachAx, del2), (hystAx, pdfAx)) = axes + + fig.delaxes(del1) + fig.delaxes(del2) + else: + cumAx, eachAx, hystAx, pdfAx = axes # Cumulative change in kcal/mol - cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1) + cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1, label=label, color=color) cumAx.set(ylabel=r'Cumulative $\mathrm{\Delta} G_{\lambda}$'+'\n(kcal/mol)', ylim=cumulativeYlim) # Per-window change in kcal/mol - eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1) - eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5) - eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5) + if errorbars: + eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1, color=color) + eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color) + eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color) + eachAx.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=perWindowYlim) #Hysteresis Plots diff = perWindow.EXP['difference'] - hystAx.vlines(perWindow.index, np.zeros(len(perWindow)), diff, label="fwd - bwd", linewidth=2) + assert hysttype in ['classic', 'lines'], f"ERROR: I don't know how to plot hysttype={hysttype}" + if hysttype == 'classic': + hystAx.vlines(perWindow.index, np.zeros(len(perWindow)), diff, label="fwd - bwd", linewidth=2, color=color) + elif hysttype == 'lines': + hystAx.plot(perWindow.index, diff, label="fwd - bwd", linewidth=1, color=color) + hystAx.set(ylabel=r'$\delta_\lambda$ (kcal/mol)', ylim=(-1,1)) hystAx.set_xlabel(xlabel=r'$\lambda$', fontsize=fontsize) @@ -161,11 +185,11 @@ def plot_general(cumulative, cumulativeYlim, perWindow, perWindowYlim, RT, width kernel = sp.stats.gaussian_kde(diff) pdfX = np.linspace(-1, 1, 1000) pdfY = kernel(pdfX) - pdfAx.plot(pdfY, pdfX, label='KDE') + pdfAx.plot(pdfY, pdfX, label='KDE', color=color) elif PDFtype=='Histogram': pdfY, pdfX = np.histogram(diff, density=True) pdfX = pdfX[:-1]+(pdfX[1]-pdfX[0])/2 - pdfAx.plot(pdfY, pdfX, label="Estimated Distribution") + pdfAx.plot(pdfY, pdfX, label="Estimated Distribution", color=color) else: raise(f"Error: PDFtype {PDFtype} not recognized") From e48919f69b5db347dd1133e42f7096db7f08dd89 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Fri, 9 Feb 2024 19:37:53 -0500 Subject: [PATCH 03/14] Draft notebook with replicas included --- .../BAR_Estimator_Multiple_Replicas.ipynb | 412 ++++++++++++++++++ 1 file changed, 412 insertions(+) create mode 100644 Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb new file mode 100644 index 0000000..5741f82 --- /dev/null +++ b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb @@ -0,0 +1,412 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "708b4d0d", + "metadata": {}, + "outputs": [], + "source": [ + "import safep\n", + "import alchemlyb\n", + "from glob import glob\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "from alchemlyb.parsing import namd\n", + "from IPython.display import display, Markdown\n", + "from pathlib import Path\n", + "from dataclasses import dataclass\n", + "import scipy as sp\n", + "from alchemlyb.estimators import BAR\n", + "import warnings\n", + "warnings.simplefilter(action='ignore', category=FutureWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf843fad", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "@dataclass\n", + "class FepRun:\n", + " u_nk: pd.DataFrame()\n", + " perWindow: pd.DataFrame()\n", + " cumulative: pd.DataFrame()\n", + " forward: pd.DataFrame()\n", + " forward_error: pd.DataFrame()\n", + " backward: pd.DataFrame()\n", + " backward_error: pd.DataFrame()\n", + " per_lambda_convergence: pd.DataFrame()\n", + " color: str" + ] + }, + { + "cell_type": "markdown", + "id": "a9d06104", + "metadata": { + "tags": [] + }, + "source": [ + "# User parameters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e67078a6", + "metadata": {}, + "outputs": [], + "source": [ + "dataroot = Path('.')\n", + "replicas = dataroot.glob('new_POPC*/')\n", + "filename_regex='outputs/*.fepout'\n", + "\n", + "temperature = 303.15\n", + "RT = 0.00198720650096 * temperature\n", + "detectEQ = True #Flag for automated equilibrium detection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6864313", + "metadata": {}, + "outputs": [], + "source": [ + "colors = ['blue', 'red', 'green', 'purple', 'orange', 'violet', 'cyan']\n", + "itcolors = iter(colors)" + ] + }, + { + "cell_type": "markdown", + "id": "30721156", + "metadata": { + "tags": [] + }, + "source": [ + "# Extract key features from the MBAR fitting and get ΔG\n", + "Note: alchemlyb operates in units of kT by default. We multiply by RT to convert to units of kcal/mol." + ] + }, + { + "cell_type": "markdown", + "id": "80b1034d", + "metadata": {}, + "source": [ + "# Read and plot number of samples after detecting EQ" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c747b75-8fa3-48f9-9ab3-ac4a12982c01", + "metadata": {}, + "outputs": [], + "source": [ + "fepruns = {}\n", + "for replica in replicas:\n", + " print(f\"Reading {replica}\")\n", + " unkpath = replica.joinpath('decorrelated.csv')\n", + " u_nk = None\n", + " if unkpath.is_file():\n", + " print(f\"Found existing dataframe. Reading.\")\n", + " u_nk = safep.read_UNK(unkpath)\n", + " else:\n", + " print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n", + " fepoutFiles = list(replica.glob(filename_regex))\n", + " totalSize = 0\n", + " for file in fepoutFiles:\n", + " totalSize += os.path.getsize(file)\n", + " print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")\n", + "\n", + " if len(list(fepoutFiles))>0:\n", + " print(\"Reading fepout files\")\n", + " fig, ax = plt.subplots()\n", + "\n", + " u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n", + " u_nk = u_nk.sort_index(axis=0, level=1).sort_index(axis=1)\n", + " safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n", + "\n", + " if detectEQ:\n", + " print(\"Detecting equilibrium\")\n", + " u_nk = safep.detect_equilibrium_u_nk(u_nk)\n", + " safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n", + "\n", + " plt.savefig(f\"./{str(replica)}_FEP_number_of_samples.pdf\")\n", + " plt.show()\n", + " safep.save_UNK(u_nk, unkpath)\n", + " else:\n", + " print(f\"WARNING: no fepout files found for {replica}. Skipping.\")\n", + " \n", + " if u_nk is not None:\n", + " fepruns[str(replica)] = FepRun(u_nk, None, None, None, None, None, None, None, next(itcolors))\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e81f216", + "metadata": {}, + "outputs": [], + "source": [ + "for key, feprun in fepruns.items():\n", + " u_nk = feprun.u_nk\n", + " feprun.perWindow, feprun.cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n", + " feprun.forward, feprun.forward_error, feprun.backward, feprun.backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n", + " feprun.per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "caae40d3", + "metadata": {}, + "source": [ + "# Plot data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd69e9c8-94d8-4a2a-9ec8-a24f78059431", + "metadata": {}, + "outputs": [], + "source": [ + "toprint = \"\"\n", + "dGs = []\n", + "errors = []\n", + "for key, feprun in fepruns.items():\n", + " cumulative = feprun.cumulative\n", + " dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n", + " error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n", + " dGs.append(dG)\n", + " errors.append(error)\n", + "\n", + " changeAndError = f'{key}: \\u0394G = {dG}\\u00B1{error} kcal/mol'\n", + " toprint += '{}
'.format(changeAndError)\n", + "\n", + "toprint += '{}
'.format('__________________')\n", + "mean = np.average(dGs)\n", + "sterr = np.round(np.std(dGs),1)\n", + "toprint += '{}
'.format(f'mean: {mean}')\n", + "toprint += '{}
'.format(f'sterr: {sterr}')\n", + "Markdown(toprint)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1fde633", + "metadata": {}, + "outputs": [], + "source": [ + "def do_agg_data(dataax, plotax):\n", + " agg_data = []\n", + " lines = dataax.lines\n", + " for line in lines:\n", + " agg_data.append(line.get_ydata())\n", + " flat = np.array(agg_data).flatten()\n", + " kernel = sp.stats.gaussian_kde(flat)\n", + " pdfX = np.linspace(-1, 1, 1000)\n", + " pdfY = kernel(pdfX)\n", + " std = np.std(flat)\n", + " mean = np.average(flat)\n", + " temp = pd.Series(pdfY, index=pdfX)\n", + " mode = temp.idxmax()\n", + "\n", + " textstr = r\"$\\rm mode=$\"+f\"{np.round(mode,2)}\"+\"\\n\"+fr\"$\\mu$={np.round(mean,2)}\"+\"\\n\"+fr\"$\\sigma$={np.round(std,2)}\"\n", + " props = dict(boxstyle='square', facecolor='white', alpha=1)\n", + " plotax.text(0.175, 0.95, textstr, transform=plotax.transAxes, fontsize=14,\n", + " verticalalignment='top', bbox=props)\n", + "\n", + " return plotax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "282938bb-988e-4e00-a2e6-ab018722569e", + "metadata": {}, + "outputs": [], + "source": [ + "fig = None\n", + "for key, feprun in fepruns.items():\n", + " if fig is None:\n", + " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", + " else:\n", + " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n", + " #fig.suptitle(changeAndError)\n", + "\n", + "# hack to get aggregate data:\n", + "axes[3] = do_agg_data(axes[2], axes[3])\n", + "\n", + "\n", + "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", + "axes[0].legend()\n", + "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))" + ] + }, + { + "cell_type": "markdown", + "id": "c7f90d8c", + "metadata": {}, + "source": [ + "# Plot the estimated total change in free energy as a function of simulation time; contiguous subsets starting at t=0 (\"Forward\") and t=end (\"Reverse\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70523392-e935-4ff4-b6e9-cf679c787bcc", + "metadata": {}, + "outputs": [], + "source": [ + "fig, convAx = plt.subplots(1,1)\n", + "\n", + "for key, feprun in fepruns.items():\n", + " convAx = safep.convergence_plot(convAx, \n", + " feprun.forward*RT, \n", + " feprun.forward_error*RT, \n", + " feprun.backward*RT,\n", + " feprun.backward_error*RT,\n", + " fwdColor=feprun.color,\n", + " bwdColor=feprun.color,\n", + " errorbars=False\n", + " )\n", + " convAx.get_legend().remove()\n", + "\n", + "forward_line, = convAx.plot([],[],linestyle='-', color='black', label='Forward Time Sampling')\n", + "backward_line, = convAx.plot([],[],linestyle='--', color='black', label='Backward Time Sampling')\n", + "convAx.legend(handles=[forward_line, backward_line])\n", + "ymin = np.min(dGs)-1\n", + "ymax = np.max(dGs)+1\n", + "convAx.set_ylim((ymin,ymax))\n", + "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1bf6ac36-f102-4da3-82d2-36a7741584e5", + "metadata": {}, + "outputs": [], + "source": [ + "import scipy as sp\n", + "fig, (Dax, pdfAx) = plt.subplots(1,2, gridspec_kw={'width_ratios': [2, 1]}, sharey='row', figsize=(10,5))\n", + "\n", + "for key, feprun in fepruns.items():\n", + " deltas = feprun.per_lambda_convergence.BAR.df*RT\n", + " lambdas = feprun.per_lambda_convergence.index\n", + " Dax.errorbar(lambdas, deltas, color=feprun.color, label=key)\n", + " Dax.set_xlabel(r\"$\\lambda$\")\n", + " Dax.set_ylabel(r\"$\\delta_\\mathrm{50\\%}$ (kcal/mol)\")\n", + "\n", + " kernel = sp.stats.gaussian_kde(deltas)\n", + " pdfX = np.linspace(-0.3, 0.3, 1000)\n", + " pdfY = kernel(pdfX)\n", + " pdfAx.plot(pdfY, pdfX, label='KDE', color=feprun.color)\n", + "\n", + "Dax.legend()\n", + "pdfAx.set_xlabel(\"KDE\")\n", + "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b237a8ee-f33e-4c05-b4ed-856bb4f34ca2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6ab4621", + "metadata": {}, + "outputs": [], + "source": [ + "genfig = None\n", + "for key, feprun in fepruns.items():\n", + " if genfig is None:\n", + " genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", + " else:\n", + " genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=genfig, axes=genaxes, hysttype='lines', label=key, color=feprun.color)\n", + "plt.delaxes(genaxes[0])\n", + "plt.delaxes(genaxes[1])\n", + "\n", + "genaxes[3] = do_agg_data(axes[2], axes[3])\n", + "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", + "\n", + "for txt in genfig.texts:\n", + " print(1)\n", + " txt.set_visible(False)\n", + " txt.set_text(\"\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b62e0eaa", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b0435ef", + "metadata": {}, + "outputs": [], + "source": [ + "genfig, [hystax, pdfax] = plt.subplots(1,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})\n", + "for key, feprun in fepruns.items():\n", + " hystax, pdfax = safep.plot_hysteresis(hystax, pdfax, feprun.perWindow, hysttype='lines', label=key, color=feprun.color)\n", + "\n", + "pdfax = do_agg_data(hystax, pdfax)\n", + "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", + "\n", + "for txt in genfig.texts:\n", + " print(1)\n", + " txt.set_visible(False)\n", + " txt.set_text(\"\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96418460", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From bb887827a469f1d7f00d3649f015f2b7738e06ea Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Fri, 9 Feb 2024 20:34:26 -0500 Subject: [PATCH 04/14] whitespace and comments --- safep/plotting.py | 131 +++++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 48 deletions(-) diff --git a/safep/plotting.py b/safep/plotting.py index a5d354c..def93ee 100644 --- a/safep/plotting.py +++ b/safep/plotting.py @@ -134,14 +134,14 @@ def fb_discrepancy_hist(dG_f, dG_b): return plt.gca() -def plot_general(cumulative, - cumulativeYlim, - perWindow, - perWindowYlim, - RT, - width=8, - height=4, - PDFtype='KDE', +def plot_general(cumulative, + cumulativeYlim, + perWindow, + perWindowYlim, + RT, + width=8, + height=4, + PDFtype='KDE', fontsize=12, fig=None, axes=None, @@ -149,76 +149,111 @@ def plot_general(cumulative, label=None, color='blue', errorbars=True): + ''' + Plot the general analysis of cumulative and per-window changes in delta-G. + + Arguments: + - cumulative: A pandas DataFrame containing the cumulative changes in delta-G. + - cumulativeYlim: The y-axis limits for the cumulative plot. + - perWindow: A pandas DataFrame containing the per-window changes in delta-G. + - perWindowYlim: The y-axis limits for the per-window plot. + - RT: The gas constant times the temperature. + - width: The width of the figure. + - height: The height of the figure. + - PDFtype: The type of probability density function to plot (either 'KDE' or 'Histogram'). + - fontsize: The font size of the plot labels. + - fig: The figure object to use for plotting. + - axes: The axes objects to use for plotting. + - hysttype: The type of hysteresis plot to use (either 'classic' or 'lines'). + - label: The label for the plot. + - color: The color of the plot. + - errorbars: Whether to include error bars in the per-window plot. + + Returns: + - fig: The figure object. + - axes: The axes objects. + ''' if fig is None and axes is None: fig, axes = plt.subplots(3,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]}) - ((cumAx, del1),( eachAx, del2), (hystAx, pdfAx)) = axes + ((cumul_ax, del1),( each_ax, del2), (hyst_ax, pdf_ax)) = axes fig.delaxes(del1) fig.delaxes(del2) else: - cumAx, eachAx, hystAx, pdfAx = axes + cumul_ax, each_ax, hyst_ax, pdf_ax = axes # Cumulative change in kcal/mol - cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1, label=label, color=color) - cumAx.set(ylabel=r'Cumulative $\mathrm{\Delta} G_{\lambda}$'+'\n(kcal/mol)', ylim=cumulativeYlim) + cumul_ax.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1, label=label, color=color) + cumul_ax.set(ylabel=r'Cumulative $\mathrm{\Delta} G_{\lambda}$'+'\n(kcal/mol)', ylim=cumulativeYlim) # Per-window change in kcal/mol if errorbars: - eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1, color=color) - eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color) - eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color) - - eachAx.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=perWindowYlim) + each_ax.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1, color=color) + each_ax.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color) + each_ax.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color) + + each_ax.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=perWindowYlim) #Hysteresis Plots - diff = perWindow.EXP['difference'] + hyst_ax, pdf_ax = plot_hysteresis(hyst_ax, pdf_ax, perWindow, hysttype, color, fontsize, PDFtype) + + fig.set_figwidth(width) + fig.set_figheight(height*3) + fig.tight_layout() + + for ax in [cumul_ax,each_ax,hyst_ax,pdf_ax]: + ax.set_ylabel(ax.get_ylabel(), fontsize=fontsize) + + return fig, [cumul_ax,each_ax,hyst_ax,pdf_ax] + + +def plot_hysteresis(hyst_ax, pdf_ax, per_window, hysttype, color='blue', fontsize=12, pdf_type='KDE'): + diff = per_window.EXP['difference'] assert hysttype in ['classic', 'lines'], f"ERROR: I don't know how to plot hysttype={hysttype}" if hysttype == 'classic': - hystAx.vlines(perWindow.index, np.zeros(len(perWindow)), diff, label="fwd - bwd", linewidth=2, color=color) + hyst_ax.vlines(per_window.index, np.zeros(len(per_window)), diff, label="fwd - bwd", linewidth=2, color=color) elif hysttype == 'lines': - hystAx.plot(perWindow.index, diff, label="fwd - bwd", linewidth=1, color=color) + hyst_ax.plot(per_window.index, diff, label="fwd - bwd", linewidth=1, color=color) - hystAx.set(ylabel=r'$\delta_\lambda$ (kcal/mol)', ylim=(-1,1)) - hystAx.set_xlabel(xlabel=r'$\lambda$', fontsize=fontsize) - - if PDFtype=='KDE': + ytxt = r'$\delta_\lambda$ (kcal/mol)' + hyst_ax.set_ylabel(ylabel=ytxt, fontsize=fontsize) + hyst_ax.set_ylim((-1,1)) + + xtxt = r'$\lambda$' + hyst_ax.set_xlabel(xlabel=xtxt, fontsize=fontsize) + + if pdf_type=='KDE': kernel = sp.stats.gaussian_kde(diff) - pdfX = np.linspace(-1, 1, 1000) - pdfY = kernel(pdfX) - pdfAx.plot(pdfY, pdfX, label='KDE', color=color) - elif PDFtype=='Histogram': - pdfY, pdfX = np.histogram(diff, density=True) - pdfX = pdfX[:-1]+(pdfX[1]-pdfX[0])/2 - pdfAx.plot(pdfY, pdfX, label="Estimated Distribution", color=color) + pdf_x = np.linspace(-1, 1, 1000) + pdf_y = kernel(pdf_x) + pdf_ax.plot(pdf_y, pdf_x, label='KDE', color=color) + elif pdf_type=='Histogram': + pdf_y, pdf_x = np.histogram(diff, density=True) + pdf_x = pdf_x[:-1]+(pdf_x[1]-pdf_x[0])/2 + pdf_ax.plot(pdf_y, pdf_x, label="Estimated Distribution", color=color) else: - raise(f"Error: PDFtype {PDFtype} not recognized") - - pdfAx.set_xlabel(PDFtype, fontsize=fontsize) + raise f"Error: PDFtype {pdf_type} not recognized" + + pdf_ax.set_xlabel(pdf_type, fontsize=fontsize) std = np.std(diff) mean = np.average(diff) - temp = pd.Series(pdfY, index=pdfX) + temp = pd.Series(pdf_y, index=pdf_x) mode = temp.idxmax() - - textstr = r"$\rm mode=$"+f"{np.round(mode,2)}"+"\n"+fr"$\mu$={np.round(mean,2)}"+"\n"+fr"$\sigma$={np.round(std,2)}" + + textstr = fr"$\rm mode=${np.round(mode,2)}"+"\n"+fr"$\mu$={np.round(mean,2)}"+"\n"+fr"$\sigma$={np.round(std,2)}" props = dict(boxstyle='square', facecolor='white', alpha=0.5) - pdfAx.text(0.15, 0.95, textstr, transform=pdfAx.transAxes, fontsize=14, + pdf_ax.text(0.15, 0.95, textstr, transform=pdf_ax.transAxes, fontsize=14, verticalalignment='top', bbox=props) - fig.set_figwidth(width) - fig.set_figheight(height*3) - fig.tight_layout() - - for ax in [cumAx,eachAx,hystAx,pdfAx]: - ax.set_ylabel(ax.get_ylabel(), fontsize=fontsize) + return hyst_ax, pdf_ax - return fig, [cumAx,eachAx,hystAx,pdfAx] -''' -This is a deprecated plotting function -''' def plot_general_legacy(cumulative, cumulativeYlim, perWindow, perWindowYlim, RT, width=8, height=4, PDFtype='KDE', fontsize=12): + ''' + This is a deprecated plotting function + ''' fig, axes = plt.subplots(4,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]}) ((cumAx, del1),( eachAx, del2), (ddGAx, del3), (hystAx, pdfAx)) = axes From aa589f049410ca709778607435168663edd30787 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Fri, 9 Feb 2024 20:35:46 -0500 Subject: [PATCH 05/14] adding EXP estimation to convergence --- safep/processing.py | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/safep/processing.py b/safep/processing.py index 1c6c58a..c9222d6 100644 --- a/safep/processing.py +++ b/safep/processing.py @@ -195,12 +195,13 @@ def alt_convergence(u_nk, nbins): return np.array(forward), np.array(forward_error) -def do_convergence(u_nk, tau=1, num_points=10): +def do_convergence(u_nk, tau=1, num_points=10, estimator='BAR'): """ Convergence calculation. Incrementally adds data from either the start or the end of each windows simulation and calculates the resulting change in free energy. Arguments: u_nk, tau (an error scaling factor), num_points (number of chunks) Returns: forward-sampled estimate (starting from t=start), forward-sampled error, backward-sampled estimate (from t=end), backward-sampled error """ + assert estimator in ['BAR', 'EXP'], "ERROR: I only know BAR and EXP estimators." groups = u_nk.groupby("fep-lambda") forward = [] @@ -210,18 +211,33 @@ def do_convergence(u_nk, tau=1, num_points=10): for i in range(1, num_points + 1): # forward partial = subsample(groups, 0, 100 * i / num_points) - estimate = BAR().fit(partial) - l, l_mid, f, df, ddf, errors = get_BAR(estimate) - - forward.append(f.iloc[-1]) - forward_error.append(errors[-1]) - + if estimator=='BAR': + estimate = BAR().fit(partial) + l, l_mid, f, df, ddf, errors = get_BAR(estimate) + f_append = f.iloc[-1] + err_append = errors[-1] + else: + expl, expmid, dG_fs, dG_bs = get_exponential(partial) + f_append = np.average([dG_fs[-1], dG_bs[-1]]) + err_append = np.std([dG_fs[-1], dG_bs[-1]]) + + forward.append(f_append) + forward_error.append(err_append) + + # backward partial = subsample(groups, 100 * (1 - i / num_points), 100) - estimate = BAR().fit(partial) - l, l_mid, f, df, ddf, errors = get_BAR(estimate) + if estimator=='BAR': + estimate = BAR().fit(partial) + l, l_mid, f, df, ddf, errors = get_BAR(estimate) + f_append = f.iloc[-1] + err_append = errors[-1] + else: + expl, expmid, dG_fs, dG_bs = get_exponential(partial) + f_append = np.average([dG_fs[-1], dG_bs[-1]]) + err_append = np.std([dG_fs[-1], dG_bs[-1]]) - backward.append(f.iloc[-1]) - backward_error.append(errors[-1]) + backward.append(f_append) + backward_error.append(err_append) return ( np.array(forward), From 900e64f314aab95b64345b16f70e675b344ad1e3 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Wed, 21 Feb 2024 10:37:18 -0500 Subject: [PATCH 06/14] remove chuff --- .../BAR_Estimator_Multiple_Replicas.ipynb | 87 +++---------------- 1 file changed, 12 insertions(+), 75 deletions(-) diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb index 5741f82..310dbfe 100644 --- a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb +++ b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb @@ -33,14 +33,14 @@ "import pandas as pd\n", "@dataclass\n", "class FepRun:\n", - " u_nk: pd.DataFrame()\n", - " perWindow: pd.DataFrame()\n", - " cumulative: pd.DataFrame()\n", - " forward: pd.DataFrame()\n", - " forward_error: pd.DataFrame()\n", - " backward: pd.DataFrame()\n", - " backward_error: pd.DataFrame()\n", - " per_lambda_convergence: pd.DataFrame()\n", + " u_nk: pd.DataFrame\n", + " perWindow: pd.DataFrame\n", + " cumulative: pd.DataFrame\n", + " forward: pd.DataFrame\n", + " forward_error: pd.DataFrame\n", + " backward: pd.DataFrame\n", + " backward_error: pd.DataFrame\n", + " per_lambda_convergence: pd.DataFrame\n", " color: str" ] }, @@ -62,8 +62,8 @@ "outputs": [], "source": [ "dataroot = Path('.')\n", - "replicas = dataroot.glob('new_POPC*/')\n", - "filename_regex='outputs/*.fepout'\n", + "replicas = dataroot.glob('Sample_Data/')\n", + "filename_regex='*.fepout'\n", "\n", "temperature = 303.15\n", "RT = 0.00198720650096 * temperature\n", @@ -289,41 +289,6 @@ "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "1bf6ac36-f102-4da3-82d2-36a7741584e5", - "metadata": {}, - "outputs": [], - "source": [ - "import scipy as sp\n", - "fig, (Dax, pdfAx) = plt.subplots(1,2, gridspec_kw={'width_ratios': [2, 1]}, sharey='row', figsize=(10,5))\n", - "\n", - "for key, feprun in fepruns.items():\n", - " deltas = feprun.per_lambda_convergence.BAR.df*RT\n", - " lambdas = feprun.per_lambda_convergence.index\n", - " Dax.errorbar(lambdas, deltas, color=feprun.color, label=key)\n", - " Dax.set_xlabel(r\"$\\lambda$\")\n", - " Dax.set_ylabel(r\"$\\delta_\\mathrm{50\\%}$ (kcal/mol)\")\n", - "\n", - " kernel = sp.stats.gaussian_kde(deltas)\n", - " pdfX = np.linspace(-0.3, 0.3, 1000)\n", - " pdfY = kernel(pdfX)\n", - " pdfAx.plot(pdfY, pdfX, label='KDE', color=feprun.color)\n", - "\n", - "Dax.legend()\n", - "pdfAx.set_xlabel(\"KDE\")\n", - "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b237a8ee-f33e-4c05-b4ed-856bb4f34ca2", - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "code", "execution_count": null, @@ -347,36 +312,8 @@ " print(1)\n", " txt.set_visible(False)\n", " txt.set_text(\"\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b62e0eaa", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5b0435ef", - "metadata": {}, - "outputs": [], - "source": [ - "genfig, [hystax, pdfax] = plt.subplots(1,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})\n", - "for key, feprun in fepruns.items():\n", - " hystax, pdfax = safep.plot_hysteresis(hystax, pdfax, feprun.perWindow, hysttype='lines', label=key, color=feprun.color)\n", - "\n", - "pdfax = do_agg_data(hystax, pdfax)\n", - "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", - "\n", - "for txt in genfig.texts:\n", - " print(1)\n", - " txt.set_visible(False)\n", - " txt.set_text(\"\")\n", - "plt.show()" + "plt.show()\n", + "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))" ] }, { From 507928b172e87a642094c0748a7c106aa282ee52 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Wed, 21 Feb 2024 10:37:39 -0500 Subject: [PATCH 07/14] refine error estimation for N<3 --- Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb index 310dbfe..98f8ee6 100644 --- a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb +++ b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb @@ -192,7 +192,12 @@ "\n", "toprint += '{}
'.format('__________________')\n", "mean = np.average(dGs)\n", - "sterr = np.round(np.std(dGs),1)\n", + "\n", + "#If there are only a few replicas, the MBAR estimated error will be more reliable, albeit underestimated\n", + "if len(dGs)<3:\n", + " sterr = np.sqrt(np.sum(np.square(errors)))\n", + "else:\n", + " sterr = np.round(np.std(dGs),1)\n", "toprint += '{}
'.format(f'mean: {mean}')\n", "toprint += '{}
'.format(f'sterr: {sterr}')\n", "Markdown(toprint)" From 2c7ddd49ef19fc053ea49d5ffaab24a0c723b0f3 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Wed, 21 Feb 2024 10:39:27 -0500 Subject: [PATCH 08/14] replacing old basic BAR with new, fancier BAR --- Sample_Notebooks/BAR_Estimator_Basic.ipynb | 250 ++++++++++--- .../BAR_Estimator_Multiple_Replicas.ipynb | 354 ------------------ 2 files changed, 206 insertions(+), 398 deletions(-) delete mode 100644 Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb index b638a7f..98f8ee6 100644 --- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb +++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb @@ -15,12 +15,35 @@ "import os\n", "from alchemlyb.parsing import namd\n", "from IPython.display import display, Markdown\n", - "\n", + "from pathlib import Path\n", + "from dataclasses import dataclass\n", + "import scipy as sp\n", "from alchemlyb.estimators import BAR\n", "import warnings\n", "warnings.simplefilter(action='ignore', category=FutureWarning)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf843fad", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "@dataclass\n", + "class FepRun:\n", + " u_nk: pd.DataFrame\n", + " perWindow: pd.DataFrame\n", + " cumulative: pd.DataFrame\n", + " forward: pd.DataFrame\n", + " forward_error: pd.DataFrame\n", + " backward: pd.DataFrame\n", + " backward_error: pd.DataFrame\n", + " per_lambda_convergence: pd.DataFrame\n", + " color: str" + ] + }, { "cell_type": "markdown", "id": "a9d06104", @@ -38,19 +61,24 @@ "metadata": {}, "outputs": [], "source": [ - "path='/path/to/data'\n", - "filename='*.fepout'\n", + "dataroot = Path('.')\n", + "replicas = dataroot.glob('Sample_Data/')\n", + "filename_regex='*.fepout'\n", "\n", "temperature = 303.15\n", "RT = 0.00198720650096 * temperature\n", - "decorrelate = True #Flag for decorrelation of samples\n", - "detectEQ = True #Flag for automated equilibrium detection\n", - "\n", - "fepoutFiles = glob(path+filename)\n", - "totalSize = 0\n", - "for file in fepoutFiles:\n", - " totalSize += os.path.getsize(file)\n", - "print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")" + "detectEQ = True #Flag for automated equilibrium detection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6864313", + "metadata": {}, + "outputs": [], + "source": [ + "colors = ['blue', 'red', 'green', 'purple', 'orange', 'violet', 'cyan']\n", + "itcolors = iter(colors)" ] }, { @@ -64,6 +92,14 @@ "Note: alchemlyb operates in units of kT by default. We multiply by RT to convert to units of kcal/mol." ] }, + { + "cell_type": "markdown", + "id": "80b1034d", + "metadata": {}, + "source": [ + "# Read and plot number of samples after detecting EQ" + ] + }, { "cell_type": "code", "execution_count": null, @@ -71,20 +107,44 @@ "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots()\n", - "u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n", - "safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n", - "\n", - "if detectEQ:\n", - " print(\"Detecting equilibrium\")\n", - " u_nk = safep.detect_equilibrium_u_nk(u_nk)\n", - " safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n", - "if decorrelate and not detectEQ:\n", - " print(\"Decorrelating\")\n", - " u_nk = safep.decorrelate_u_nk(u_nk)\n", - " safep.plot_samples(ax, u_nk, color='green', label='Decorrelated')\n", + "fepruns = {}\n", + "for replica in replicas:\n", + " print(f\"Reading {replica}\")\n", + " unkpath = replica.joinpath('decorrelated.csv')\n", + " u_nk = None\n", + " if unkpath.is_file():\n", + " print(f\"Found existing dataframe. Reading.\")\n", + " u_nk = safep.read_UNK(unkpath)\n", + " else:\n", + " print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n", + " fepoutFiles = list(replica.glob(filename_regex))\n", + " totalSize = 0\n", + " for file in fepoutFiles:\n", + " totalSize += os.path.getsize(file)\n", + " print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")\n", + "\n", + " if len(list(fepoutFiles))>0:\n", + " print(\"Reading fepout files\")\n", + " fig, ax = plt.subplots()\n", + "\n", + " u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n", + " u_nk = u_nk.sort_index(axis=0, level=1).sort_index(axis=1)\n", + " safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n", + "\n", + " if detectEQ:\n", + " print(\"Detecting equilibrium\")\n", + " u_nk = safep.detect_equilibrium_u_nk(u_nk)\n", + " safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n", + "\n", + " plt.savefig(f\"./{str(replica)}_FEP_number_of_samples.pdf\")\n", + " plt.show()\n", + " safep.save_UNK(u_nk, unkpath)\n", + " else:\n", + " print(f\"WARNING: no fepout files found for {replica}. Skipping.\")\n", " \n", - "plt.savefig(f\"{path}FEP_number_of_samples.pdf\")" + " if u_nk is not None:\n", + " fepruns[str(replica)] = FepRun(u_nk, None, None, None, None, None, None, None, next(itcolors))\n", + " " ] }, { @@ -94,9 +154,20 @@ "metadata": {}, "outputs": [], "source": [ - "perWindow, cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n", - "forward, forward_error, backward, backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n", - "per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)" + "for key, feprun in fepruns.items():\n", + " u_nk = feprun.u_nk\n", + " feprun.perWindow, feprun.cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n", + " feprun.forward, feprun.forward_error, feprun.backward, feprun.backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n", + " feprun.per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "caae40d3", + "metadata": {}, + "source": [ + "# Plot data" ] }, { @@ -106,11 +177,59 @@ "metadata": {}, "outputs": [], "source": [ - "dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n", - "error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n", + "toprint = \"\"\n", + "dGs = []\n", + "errors = []\n", + "for key, feprun in fepruns.items():\n", + " cumulative = feprun.cumulative\n", + " dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n", + " error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n", + " dGs.append(dG)\n", + " errors.append(error)\n", + "\n", + " changeAndError = f'{key}: \\u0394G = {dG}\\u00B1{error} kcal/mol'\n", + " toprint += '{}
'.format(changeAndError)\n", + "\n", + "toprint += '{}
'.format('__________________')\n", + "mean = np.average(dGs)\n", + "\n", + "#If there are only a few replicas, the MBAR estimated error will be more reliable, albeit underestimated\n", + "if len(dGs)<3:\n", + " sterr = np.sqrt(np.sum(np.square(errors)))\n", + "else:\n", + " sterr = np.round(np.std(dGs),1)\n", + "toprint += '{}
'.format(f'mean: {mean}')\n", + "toprint += '{}
'.format(f'sterr: {sterr}')\n", + "Markdown(toprint)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1fde633", + "metadata": {}, + "outputs": [], + "source": [ + "def do_agg_data(dataax, plotax):\n", + " agg_data = []\n", + " lines = dataax.lines\n", + " for line in lines:\n", + " agg_data.append(line.get_ydata())\n", + " flat = np.array(agg_data).flatten()\n", + " kernel = sp.stats.gaussian_kde(flat)\n", + " pdfX = np.linspace(-1, 1, 1000)\n", + " pdfY = kernel(pdfX)\n", + " std = np.std(flat)\n", + " mean = np.average(flat)\n", + " temp = pd.Series(pdfY, index=pdfX)\n", + " mode = temp.idxmax()\n", + "\n", + " textstr = r\"$\\rm mode=$\"+f\"{np.round(mode,2)}\"+\"\\n\"+fr\"$\\mu$={np.round(mean,2)}\"+\"\\n\"+fr\"$\\sigma$={np.round(std,2)}\"\n", + " props = dict(boxstyle='square', facecolor='white', alpha=1)\n", + " plotax.text(0.175, 0.95, textstr, transform=plotax.transAxes, fontsize=14,\n", + " verticalalignment='top', bbox=props)\n", "\n", - "changeAndError = f'\\u0394G = {dG}\\u00B1{error} kcal/mol'\n", - "Markdown('{}
'.format(changeAndError))" + " return plotax" ] }, { @@ -120,9 +239,21 @@ "metadata": {}, "outputs": [], "source": [ - "fig, axes = safep.plot_general(cumulative, None, perWindow, None, RT)\n", - "fig.suptitle(changeAndError)\n", - "plt.savefig(f'{path}FEP_general_figures.pdf')" + "fig = None\n", + "for key, feprun in fepruns.items():\n", + " if fig is None:\n", + " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", + " else:\n", + " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n", + " #fig.suptitle(changeAndError)\n", + "\n", + "# hack to get aggregate data:\n", + "axes[3] = do_agg_data(axes[2], axes[3])\n", + "\n", + "\n", + "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", + "axes[0].legend()\n", + "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))" ] }, { @@ -141,28 +272,59 @@ "outputs": [], "source": [ "fig, convAx = plt.subplots(1,1)\n", - "convAx = safep.convergence_plot(convAx, forward*RT, forward_error*RT, backward*RT, backward_error*RT)\n", - "plt.savefig(f'{path}FEP_convergence.pdf')" + "\n", + "for key, feprun in fepruns.items():\n", + " convAx = safep.convergence_plot(convAx, \n", + " feprun.forward*RT, \n", + " feprun.forward_error*RT, \n", + " feprun.backward*RT,\n", + " feprun.backward_error*RT,\n", + " fwdColor=feprun.color,\n", + " bwdColor=feprun.color,\n", + " errorbars=False\n", + " )\n", + " convAx.get_legend().remove()\n", + "\n", + "forward_line, = convAx.plot([],[],linestyle='-', color='black', label='Forward Time Sampling')\n", + "backward_line, = convAx.plot([],[],linestyle='--', color='black', label='Backward Time Sampling')\n", + "convAx.legend(handles=[forward_line, backward_line])\n", + "ymin = np.min(dGs)-1\n", + "ymax = np.max(dGs)+1\n", + "convAx.set_ylim((ymin,ymax))\n", + "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))" ] }, { "cell_type": "code", "execution_count": null, - "id": "1bf6ac36-f102-4da3-82d2-36a7741584e5", + "id": "e6ab4621", "metadata": {}, "outputs": [], "source": [ - "fig, ax = plt.subplots()\n", - "ax.errorbar(per_lambda_convergence.index, per_lambda_convergence.BAR.df*RT)\n", - "ax.set_xlabel(r\"$\\lambda$\")\n", - "ax.set_ylabel(r\"$D_{last-first}$ (kcal/mol)\")\n", - "plt.savefig(f\"{path}FEP_perLambda_convergence.pdf\")" + "genfig = None\n", + "for key, feprun in fepruns.items():\n", + " if genfig is None:\n", + " genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", + " else:\n", + " genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=genfig, axes=genaxes, hysttype='lines', label=key, color=feprun.color)\n", + "plt.delaxes(genaxes[0])\n", + "plt.delaxes(genaxes[1])\n", + "\n", + "genaxes[3] = do_agg_data(axes[2], axes[3])\n", + "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", + "\n", + "for txt in genfig.texts:\n", + " print(1)\n", + " txt.set_visible(False)\n", + " txt.set_text(\"\")\n", + "plt.show()\n", + "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))" ] }, { "cell_type": "code", "execution_count": null, - "id": "b237a8ee-f33e-4c05-b4ed-856bb4f34ca2", + "id": "96418460", "metadata": {}, "outputs": [], "source": [] @@ -184,7 +346,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb deleted file mode 100644 index 98f8ee6..0000000 --- a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb +++ /dev/null @@ -1,354 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "708b4d0d", - "metadata": {}, - "outputs": [], - "source": [ - "import safep\n", - "import alchemlyb\n", - "from glob import glob\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "import os\n", - "from alchemlyb.parsing import namd\n", - "from IPython.display import display, Markdown\n", - "from pathlib import Path\n", - "from dataclasses import dataclass\n", - "import scipy as sp\n", - "from alchemlyb.estimators import BAR\n", - "import warnings\n", - "warnings.simplefilter(action='ignore', category=FutureWarning)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf843fad", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "@dataclass\n", - "class FepRun:\n", - " u_nk: pd.DataFrame\n", - " perWindow: pd.DataFrame\n", - " cumulative: pd.DataFrame\n", - " forward: pd.DataFrame\n", - " forward_error: pd.DataFrame\n", - " backward: pd.DataFrame\n", - " backward_error: pd.DataFrame\n", - " per_lambda_convergence: pd.DataFrame\n", - " color: str" - ] - }, - { - "cell_type": "markdown", - "id": "a9d06104", - "metadata": { - "tags": [] - }, - "source": [ - "# User parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e67078a6", - "metadata": {}, - "outputs": [], - "source": [ - "dataroot = Path('.')\n", - "replicas = dataroot.glob('Sample_Data/')\n", - "filename_regex='*.fepout'\n", - "\n", - "temperature = 303.15\n", - "RT = 0.00198720650096 * temperature\n", - "detectEQ = True #Flag for automated equilibrium detection" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6864313", - "metadata": {}, - "outputs": [], - "source": [ - "colors = ['blue', 'red', 'green', 'purple', 'orange', 'violet', 'cyan']\n", - "itcolors = iter(colors)" - ] - }, - { - "cell_type": "markdown", - "id": "30721156", - "metadata": { - "tags": [] - }, - "source": [ - "# Extract key features from the MBAR fitting and get ΔG\n", - "Note: alchemlyb operates in units of kT by default. We multiply by RT to convert to units of kcal/mol." - ] - }, - { - "cell_type": "markdown", - "id": "80b1034d", - "metadata": {}, - "source": [ - "# Read and plot number of samples after detecting EQ" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c747b75-8fa3-48f9-9ab3-ac4a12982c01", - "metadata": {}, - "outputs": [], - "source": [ - "fepruns = {}\n", - "for replica in replicas:\n", - " print(f\"Reading {replica}\")\n", - " unkpath = replica.joinpath('decorrelated.csv')\n", - " u_nk = None\n", - " if unkpath.is_file():\n", - " print(f\"Found existing dataframe. Reading.\")\n", - " u_nk = safep.read_UNK(unkpath)\n", - " else:\n", - " print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n", - " fepoutFiles = list(replica.glob(filename_regex))\n", - " totalSize = 0\n", - " for file in fepoutFiles:\n", - " totalSize += os.path.getsize(file)\n", - " print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")\n", - "\n", - " if len(list(fepoutFiles))>0:\n", - " print(\"Reading fepout files\")\n", - " fig, ax = plt.subplots()\n", - "\n", - " u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n", - " u_nk = u_nk.sort_index(axis=0, level=1).sort_index(axis=1)\n", - " safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n", - "\n", - " if detectEQ:\n", - " print(\"Detecting equilibrium\")\n", - " u_nk = safep.detect_equilibrium_u_nk(u_nk)\n", - " safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n", - "\n", - " plt.savefig(f\"./{str(replica)}_FEP_number_of_samples.pdf\")\n", - " plt.show()\n", - " safep.save_UNK(u_nk, unkpath)\n", - " else:\n", - " print(f\"WARNING: no fepout files found for {replica}. Skipping.\")\n", - " \n", - " if u_nk is not None:\n", - " fepruns[str(replica)] = FepRun(u_nk, None, None, None, None, None, None, None, next(itcolors))\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e81f216", - "metadata": {}, - "outputs": [], - "source": [ - "for key, feprun in fepruns.items():\n", - " u_nk = feprun.u_nk\n", - " feprun.perWindow, feprun.cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n", - " feprun.forward, feprun.forward_error, feprun.backward, feprun.backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n", - " feprun.per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "caae40d3", - "metadata": {}, - "source": [ - "# Plot data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fd69e9c8-94d8-4a2a-9ec8-a24f78059431", - "metadata": {}, - "outputs": [], - "source": [ - "toprint = \"\"\n", - "dGs = []\n", - "errors = []\n", - "for key, feprun in fepruns.items():\n", - " cumulative = feprun.cumulative\n", - " dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n", - " error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n", - " dGs.append(dG)\n", - " errors.append(error)\n", - "\n", - " changeAndError = f'{key}: \\u0394G = {dG}\\u00B1{error} kcal/mol'\n", - " toprint += '{}
'.format(changeAndError)\n", - "\n", - "toprint += '{}
'.format('__________________')\n", - "mean = np.average(dGs)\n", - "\n", - "#If there are only a few replicas, the MBAR estimated error will be more reliable, albeit underestimated\n", - "if len(dGs)<3:\n", - " sterr = np.sqrt(np.sum(np.square(errors)))\n", - "else:\n", - " sterr = np.round(np.std(dGs),1)\n", - "toprint += '{}
'.format(f'mean: {mean}')\n", - "toprint += '{}
'.format(f'sterr: {sterr}')\n", - "Markdown(toprint)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1fde633", - "metadata": {}, - "outputs": [], - "source": [ - "def do_agg_data(dataax, plotax):\n", - " agg_data = []\n", - " lines = dataax.lines\n", - " for line in lines:\n", - " agg_data.append(line.get_ydata())\n", - " flat = np.array(agg_data).flatten()\n", - " kernel = sp.stats.gaussian_kde(flat)\n", - " pdfX = np.linspace(-1, 1, 1000)\n", - " pdfY = kernel(pdfX)\n", - " std = np.std(flat)\n", - " mean = np.average(flat)\n", - " temp = pd.Series(pdfY, index=pdfX)\n", - " mode = temp.idxmax()\n", - "\n", - " textstr = r\"$\\rm mode=$\"+f\"{np.round(mode,2)}\"+\"\\n\"+fr\"$\\mu$={np.round(mean,2)}\"+\"\\n\"+fr\"$\\sigma$={np.round(std,2)}\"\n", - " props = dict(boxstyle='square', facecolor='white', alpha=1)\n", - " plotax.text(0.175, 0.95, textstr, transform=plotax.transAxes, fontsize=14,\n", - " verticalalignment='top', bbox=props)\n", - "\n", - " return plotax" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "282938bb-988e-4e00-a2e6-ab018722569e", - "metadata": {}, - "outputs": [], - "source": [ - "fig = None\n", - "for key, feprun in fepruns.items():\n", - " if fig is None:\n", - " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", - " else:\n", - " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n", - " #fig.suptitle(changeAndError)\n", - "\n", - "# hack to get aggregate data:\n", - "axes[3] = do_agg_data(axes[2], axes[3])\n", - "\n", - "\n", - "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", - "axes[0].legend()\n", - "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))" - ] - }, - { - "cell_type": "markdown", - "id": "c7f90d8c", - "metadata": {}, - "source": [ - "# Plot the estimated total change in free energy as a function of simulation time; contiguous subsets starting at t=0 (\"Forward\") and t=end (\"Reverse\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "70523392-e935-4ff4-b6e9-cf679c787bcc", - "metadata": {}, - "outputs": [], - "source": [ - "fig, convAx = plt.subplots(1,1)\n", - "\n", - "for key, feprun in fepruns.items():\n", - " convAx = safep.convergence_plot(convAx, \n", - " feprun.forward*RT, \n", - " feprun.forward_error*RT, \n", - " feprun.backward*RT,\n", - " feprun.backward_error*RT,\n", - " fwdColor=feprun.color,\n", - " bwdColor=feprun.color,\n", - " errorbars=False\n", - " )\n", - " convAx.get_legend().remove()\n", - "\n", - "forward_line, = convAx.plot([],[],linestyle='-', color='black', label='Forward Time Sampling')\n", - "backward_line, = convAx.plot([],[],linestyle='--', color='black', label='Backward Time Sampling')\n", - "convAx.legend(handles=[forward_line, backward_line])\n", - "ymin = np.min(dGs)-1\n", - "ymax = np.max(dGs)+1\n", - "convAx.set_ylim((ymin,ymax))\n", - "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e6ab4621", - "metadata": {}, - "outputs": [], - "source": [ - "genfig = None\n", - "for key, feprun in fepruns.items():\n", - " if genfig is None:\n", - " genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", - " else:\n", - " genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=genfig, axes=genaxes, hysttype='lines', label=key, color=feprun.color)\n", - "plt.delaxes(genaxes[0])\n", - "plt.delaxes(genaxes[1])\n", - "\n", - "genaxes[3] = do_agg_data(axes[2], axes[3])\n", - "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", - "\n", - "for txt in genfig.texts:\n", - " print(1)\n", - " txt.set_visible(False)\n", - " txt.set_text(\"\")\n", - "plt.show()\n", - "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "96418460", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 6e7b1b80bd9400287a09f17a2720d56c61aedaf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20H=C3=A9nin?= Date: Thu, 22 Feb 2024 23:09:52 +0100 Subject: [PATCH 09/14] Variable name changes, test replica code --- Sample_Notebooks/BAR_Estimator_Basic.ipynb | 9 +++++---- Sample_Notebooks/Replica1 | 1 + Sample_Notebooks/Replica2 | 1 + Sample_Notebooks/Replica3 | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) create mode 120000 Sample_Notebooks/Replica1 create mode 120000 Sample_Notebooks/Replica2 create mode 120000 Sample_Notebooks/Replica3 diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb index 98f8ee6..f4bf77f 100644 --- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb +++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb @@ -62,8 +62,9 @@ "outputs": [], "source": [ "dataroot = Path('.')\n", - "replicas = dataroot.glob('Sample_Data/')\n", - "filename_regex='*.fepout'\n", + "replica_pattern='Replica?'\n", + "replicas = dataroot.glob(replica_pattern)\n", + "filename_pattern='*.fepout'\n", "\n", "temperature = 303.15\n", "RT = 0.00198720650096 * temperature\n", @@ -117,7 +118,7 @@ " u_nk = safep.read_UNK(unkpath)\n", " else:\n", " print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n", - " fepoutFiles = list(replica.glob(filename_regex))\n", + " fepoutFiles = list(replica.glob(filename_pattern))\n", " totalSize = 0\n", " for file in fepoutFiles:\n", " totalSize += os.path.getsize(file)\n", @@ -346,7 +347,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.0" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/Sample_Notebooks/Replica1 b/Sample_Notebooks/Replica1 new file mode 120000 index 0000000..df9f7a8 --- /dev/null +++ b/Sample_Notebooks/Replica1 @@ -0,0 +1 @@ +Sample_Data \ No newline at end of file diff --git a/Sample_Notebooks/Replica2 b/Sample_Notebooks/Replica2 new file mode 120000 index 0000000..df9f7a8 --- /dev/null +++ b/Sample_Notebooks/Replica2 @@ -0,0 +1 @@ +Sample_Data \ No newline at end of file diff --git a/Sample_Notebooks/Replica3 b/Sample_Notebooks/Replica3 new file mode 120000 index 0000000..df9f7a8 --- /dev/null +++ b/Sample_Notebooks/Replica3 @@ -0,0 +1 @@ +Sample_Data \ No newline at end of file From a51e13189f1ddd622ac055ec1ddc5ca9da940059 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Sat, 2 Mar 2024 11:29:05 -0500 Subject: [PATCH 10/14] removing redundant definition --- safep/plotting.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/safep/plotting.py b/safep/plotting.py index def93ee..80aceee 100644 --- a/safep/plotting.py +++ b/safep/plotting.py @@ -18,27 +18,27 @@ def plot_samples(theax, u_nk, color='blue', label='Raw Data'): return theax -def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None): - ''' - A revised convergence plot. Plays nicely with other plotting functions and is more customizable. - Arguments: theax[is] on which to plot, fs (forward estimates), ferr (forward errors), bs (backward-sampled estimates), berr (backward-sampled errors), fwdColor, bwdColor, lgndF legend forward color, lgndB legend backward color - Returns: theax[is] - ''' - if not lgndF: - lgndF=fwdColor - lgndB=bwdColor +# def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None): +# ''' +# A revised convergence plot. Plays nicely with other plotting functions and is more customizable. +# Arguments: theax[is] on which to plot, fs (forward estimates), ferr (forward errors), bs (backward-sampled estimates), berr (backward-sampled errors), fwdColor, bwdColor, lgndF legend forward color, lgndB legend backward color +# Returns: theax[is] +# ''' +# if not lgndF: +# lgndF=fwdColor +# lgndB=bwdColor - theax.errorbar(np.arange(len(fs))/len(fs)+0.1, fs, yerr=ferr, marker='o', linewidth=1, color=fwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=fwdColor, ms=5) - theax.errorbar(np.arange(len(bs))/len(fs)+0.1, bs, yerr=berr, marker='o', linewidth=1, color=bwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=bwdColor, ms=5, linestyle='--') +# theax.errorbar(np.arange(len(fs))/len(fs)+0.1, fs, yerr=ferr, marker='o', linewidth=1, color=fwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=fwdColor, ms=5) +# theax.errorbar(np.arange(len(bs))/len(fs)+0.1, bs, yerr=berr, marker='o', linewidth=1, color=bwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=bwdColor, ms=5, linestyle='--') - theax.xaxis.set_ticks([0, 0.2, 0.4, 0.6, 0.8, 1]) +# theax.xaxis.set_ticks([0, 0.2, 0.4, 0.6, 0.8, 1]) - finalMean = fs[-1] - theax.axhline(y= finalMean, linestyle='-.', color='gray') - theax.plot(0, finalMean, linewidth=1, color=lgndF, label='Forward Time Sampling') - theax.plot(0, finalMean, linewidth=1, color=lgndB, linestyle='--', label='Backward Time Sampling') +# finalMean = fs[-1] +# theax.axhline(y= finalMean, linestyle='-.', color='gray') +# theax.plot(0, finalMean, linewidth=1, color=lgndF, label='Forward Time Sampling') +# theax.plot(0, finalMean, linewidth=1, color=lgndB, linestyle='--', label='Backward Time Sampling') - return theax +# return theax def do_conv_plot(ax, X, fs, ferr, fwdColor, label=None): ''' From 119876e139d9da45fd3173c3fc241eeb7ff37308 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Thu, 13 Jun 2024 09:04:58 -0400 Subject: [PATCH 11/14] add labels to per_window plot --- safep/plotting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/safep/plotting.py b/safep/plotting.py index e377f19..5ceb74a 100644 --- a/safep/plotting.py +++ b/safep/plotting.py @@ -187,9 +187,9 @@ def plot_general(cumulative, # Per-window change in kcal/mol if errorbars: - each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, color=color) - each_ax.errorbar(per_window.index, -per_window.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color) - each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color) + each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, color=color, label="forward EXP") + each_ax.errorbar(per_window.index, -per_window.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color, label="backward EXP") + each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color, label="BAR") each_ax.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=per_window_ylim) From 8738f587fb327851863777ebc6f7968e87d0f841 Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Thu, 13 Jun 2024 09:08:33 -0400 Subject: [PATCH 12/14] add legend to per_window general plot (second row) to clarify solid and dashed lines --- Sample_Notebooks/BAR_Estimator_Basic.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb index f4bf77f..a9f700a 100644 --- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb +++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb @@ -244,6 +244,7 @@ "for key, feprun in fepruns.items():\n", " if fig is None:\n", " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n", + " axes[1].legend()\n", " else:\n", " fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n", " #fig.suptitle(changeAndError)\n", @@ -251,7 +252,6 @@ "# hack to get aggregate data:\n", "axes[3] = do_agg_data(axes[2], axes[3])\n", "\n", - "\n", "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n", "axes[0].legend()\n", "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))" @@ -347,7 +347,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.12.0" } }, "nbformat": 4, From e8dc613614b4a446727e10aba3df019787a9e2ad Mon Sep 17 00:00:00 2001 From: EzryStIago Date: Thu, 13 Jun 2024 09:10:59 -0400 Subject: [PATCH 13/14] make EXP curves in general plot alpha=0.5 --- safep/plotting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/safep/plotting.py b/safep/plotting.py index 5ceb74a..a3471cf 100644 --- a/safep/plotting.py +++ b/safep/plotting.py @@ -187,9 +187,9 @@ def plot_general(cumulative, # Per-window change in kcal/mol if errorbars: - each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, color=color, label="forward EXP") + each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, alpha=0.5, color=color, label="forward EXP") each_ax.errorbar(per_window.index, -per_window.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color, label="backward EXP") - each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color, label="BAR") + each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, color=color, label="BAR") each_ax.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=per_window_ylim) From 6f75ccc82a9f43ee0d297c94e6dec022376e07d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20H=C3=A9nin?= Date: Wed, 19 Jun 2024 13:37:45 +0200 Subject: [PATCH 14/14] Fix parameter names --- Sample_Notebooks/BAR_Estimator_Basic.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb index a9f700a..7896a58 100644 --- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb +++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb @@ -280,8 +280,8 @@ " feprun.forward_error*RT, \n", " feprun.backward*RT,\n", " feprun.backward_error*RT,\n", - " fwdColor=feprun.color,\n", - " bwdColor=feprun.color,\n", + " fwd_color=feprun.color,\n", + " bwd_color=feprun.color,\n", " errorbars=False\n", " )\n", " convAx.get_legend().remove()\n", @@ -347,7 +347,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.0" + "version": "3.12.3" } }, "nbformat": 4,