From 150d6094f8dd9205433d720405289630d411fbed Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Wed, 24 Jan 2024 17:07:17 -0500
Subject: [PATCH 01/14] convergence plot can now plot multiple replicas

---
 safep/plotting.py | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/safep/plotting.py b/safep/plotting.py
index 1ef1762..0076a41 100644
--- a/safep/plotting.py
+++ b/safep/plotting.py
@@ -50,10 +50,9 @@ def do_conv_plot(ax, X, fs, ferr, fwdColor, label=None):
     return ax
 
 
-def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None, fontsize=12):
+def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None, fontsize=12, errorbars=True):
     '''
-    Convergence plot. Does the convergence calculation and plotting.
-    Arguments: u_nk, tau (an error tuning factor), units (kT or kcal/mol), RT
+    Convergence plot. Does the convergence plotting.
     Returns: a pyplot
     '''
     if not lgndF:
@@ -63,9 +62,35 @@ def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D
         
     lower = fs[-1]-ferr[-1]
     upper = fs[-1]+ferr[-1]
-    theax.fill_between([0,1],[lower, lower], [upper, upper], color=bwdColor, alpha=0.25)
-    theax.errorbar(np.arange(len(fs))/len(fs)+0.1, fs, yerr=ferr, marker='o', linewidth=1, color=fwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=fwdColor, ms=5)
-    theax.errorbar(np.arange(len(bs))/len(fs)+0.1, bs, yerr=berr, marker='o', linewidth=1, color=bwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=bwdColor, ms=5, linestyle='--')
+    theax.fill_between([0,1],[lower, lower], [upper, upper], color='gray', alpha=0.1)
+
+    fwdparams = {'marker':'o',
+                'linewidth':1,
+                'color':fwdColor,
+                'markerfacecolor':'white',
+                'markeredgewidth':1,
+                'markeredgecolor':fwdColor,
+                'ms':5,
+                }
+    bwdparams = {'marker':'o',
+                'linewidth':1,
+                'color':bwdColor,
+                'markerfacecolor':'white',
+                'markeredgewidth':1,
+                'markeredgecolor':bwdColor,
+                'ms':5,
+                'linestyle':'--'}
+
+    xfwd = np.arange(len(fs))/len(fs)+0.1
+    xbwd = np.arange(len(bs))/len(fs)+0.1
+
+    if errorbars:
+        theax.errorbar(xfwd, fs, yerr=ferr, **fwdparams)
+        theax.errorbar(xbwd, bs, yerr=berr, **bwdparams)
+    else:
+        theax.errorbar(xfwd, fs, **fwdparams)
+        theax.errorbar(xbwd, bs, **bwdparams)
+
 
     theax.xaxis.set_ticks([0, 0.2, 0.4, 0.6, 0.8, 1])
     

From 6404a997bda2844bd2e354ce1049d81051c10845 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Wed, 24 Jan 2024 17:07:35 -0500
Subject: [PATCH 02/14] general plots now support multiple replicas.

---
 safep/plotting.py | 50 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 13 deletions(-)

diff --git a/safep/plotting.py b/safep/plotting.py
index 0076a41..a5d354c 100644
--- a/safep/plotting.py
+++ b/safep/plotting.py
@@ -134,26 +134,50 @@ def fb_discrepancy_hist(dG_f, dG_b):
     return plt.gca()
 
 
-def plot_general(cumulative, cumulativeYlim, perWindow, perWindowYlim, RT, width=8, height=4, PDFtype='KDE', fontsize=12):
-    fig, axes = plt.subplots(3,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})
-    ((cumAx, del1),( eachAx, del2), (hystAx, pdfAx)) = axes
-
-    fig.delaxes(del1)
-    fig.delaxes(del2)
+def plot_general(cumulative, 
+                 cumulativeYlim, 
+                 perWindow, 
+                 perWindowYlim, 
+                 RT, 
+                 width=8, 
+                 height=4, 
+                 PDFtype='KDE', 
+                 fontsize=12,
+                 fig=None,
+                 axes=None,
+                 hysttype='classic',
+                 label=None,
+                 color='blue',
+                 errorbars=True):
+    if fig is None and axes is None:
+        fig, axes = plt.subplots(3,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})
+        ((cumAx, del1),( eachAx, del2), (hystAx, pdfAx)) = axes
+
+        fig.delaxes(del1)
+        fig.delaxes(del2)
+    else:
+        cumAx, eachAx, hystAx, pdfAx = axes
 
     # Cumulative change in kcal/mol
-    cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1)
+    cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1, label=label, color=color)
     cumAx.set(ylabel=r'Cumulative $\mathrm{\Delta} G_{\lambda}$'+'\n(kcal/mol)', ylim=cumulativeYlim)
 
     # Per-window change in kcal/mol
-    eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1)
-    eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5)
-    eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5)
+    if errorbars:
+        eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1, color=color)
+        eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color)
+    eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color)
+        
     eachAx.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=perWindowYlim)
 
     #Hysteresis Plots
     diff = perWindow.EXP['difference']
-    hystAx.vlines(perWindow.index, np.zeros(len(perWindow)), diff, label="fwd - bwd", linewidth=2)
+    assert hysttype in ['classic', 'lines'], f"ERROR: I don't know how to plot hysttype={hysttype}"
+    if hysttype == 'classic':
+        hystAx.vlines(perWindow.index, np.zeros(len(perWindow)), diff, label="fwd - bwd", linewidth=2, color=color)
+    elif hysttype == 'lines':
+        hystAx.plot(perWindow.index, diff, label="fwd - bwd", linewidth=1, color=color)
+
     hystAx.set(ylabel=r'$\delta_\lambda$ (kcal/mol)', ylim=(-1,1))
     hystAx.set_xlabel(xlabel=r'$\lambda$', fontsize=fontsize)
     
@@ -161,11 +185,11 @@ def plot_general(cumulative, cumulativeYlim, perWindow, perWindowYlim, RT, width
         kernel = sp.stats.gaussian_kde(diff)
         pdfX = np.linspace(-1, 1, 1000)
         pdfY = kernel(pdfX)
-        pdfAx.plot(pdfY, pdfX, label='KDE')
+        pdfAx.plot(pdfY, pdfX, label='KDE', color=color)
     elif PDFtype=='Histogram':
         pdfY, pdfX = np.histogram(diff, density=True)
         pdfX = pdfX[:-1]+(pdfX[1]-pdfX[0])/2
-        pdfAx.plot(pdfY, pdfX,  label="Estimated Distribution")
+        pdfAx.plot(pdfY, pdfX,  label="Estimated Distribution", color=color)
     else:
         raise(f"Error: PDFtype {PDFtype} not recognized")
     

From e48919f69b5db347dd1133e42f7096db7f08dd89 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Fri, 9 Feb 2024 19:37:53 -0500
Subject: [PATCH 03/14] Draft notebook with replicas included

---
 .../BAR_Estimator_Multiple_Replicas.ipynb     | 412 ++++++++++++++++++
 1 file changed, 412 insertions(+)
 create mode 100644 Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb

diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
new file mode 100644
index 0000000..5741f82
--- /dev/null
+++ b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
@@ -0,0 +1,412 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "708b4d0d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import safep\n",
+    "import alchemlyb\n",
+    "from glob import glob\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "from alchemlyb.parsing import namd\n",
+    "from IPython.display import display, Markdown\n",
+    "from pathlib import Path\n",
+    "from dataclasses import dataclass\n",
+    "import scipy as sp\n",
+    "from alchemlyb.estimators import BAR\n",
+    "import warnings\n",
+    "warnings.simplefilter(action='ignore', category=FutureWarning)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf843fad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "@dataclass\n",
+    "class FepRun:\n",
+    "    u_nk:           pd.DataFrame()\n",
+    "    perWindow:      pd.DataFrame()\n",
+    "    cumulative:     pd.DataFrame()\n",
+    "    forward:        pd.DataFrame()\n",
+    "    forward_error:  pd.DataFrame()\n",
+    "    backward:       pd.DataFrame()\n",
+    "    backward_error: pd.DataFrame()\n",
+    "    per_lambda_convergence: pd.DataFrame()\n",
+    "    color: str"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a9d06104",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# User parameters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e67078a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataroot = Path('.')\n",
+    "replicas = dataroot.glob('new_POPC*/')\n",
+    "filename_regex='outputs/*.fepout'\n",
+    "\n",
+    "temperature = 303.15\n",
+    "RT = 0.00198720650096 * temperature\n",
+    "detectEQ = True #Flag for automated equilibrium detection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6864313",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "colors = ['blue', 'red', 'green', 'purple', 'orange', 'violet', 'cyan']\n",
+    "itcolors = iter(colors)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "30721156",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Extract key features from the MBAR fitting and get ΔG\n",
+    "Note: alchemlyb operates in units of kT by default. We multiply by RT to convert to units of kcal/mol."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "80b1034d",
+   "metadata": {},
+   "source": [
+    "# Read and plot number of samples after detecting EQ"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7c747b75-8fa3-48f9-9ab3-ac4a12982c01",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fepruns = {}\n",
+    "for replica in replicas:\n",
+    "    print(f\"Reading {replica}\")\n",
+    "    unkpath = replica.joinpath('decorrelated.csv')\n",
+    "    u_nk = None\n",
+    "    if unkpath.is_file():\n",
+    "        print(f\"Found existing dataframe. Reading.\")\n",
+    "        u_nk = safep.read_UNK(unkpath)\n",
+    "    else:\n",
+    "        print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n",
+    "        fepoutFiles = list(replica.glob(filename_regex))\n",
+    "        totalSize = 0\n",
+    "        for file in fepoutFiles:\n",
+    "            totalSize += os.path.getsize(file)\n",
+    "        print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")\n",
+    "\n",
+    "        if len(list(fepoutFiles))>0:\n",
+    "            print(\"Reading fepout files\")\n",
+    "            fig, ax = plt.subplots()\n",
+    "\n",
+    "            u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n",
+    "            u_nk = u_nk.sort_index(axis=0, level=1).sort_index(axis=1)\n",
+    "            safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n",
+    "\n",
+    "            if detectEQ:\n",
+    "                print(\"Detecting equilibrium\")\n",
+    "                u_nk = safep.detect_equilibrium_u_nk(u_nk)\n",
+    "                safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n",
+    "\n",
+    "            plt.savefig(f\"./{str(replica)}_FEP_number_of_samples.pdf\")\n",
+    "            plt.show()\n",
+    "            safep.save_UNK(u_nk, unkpath)\n",
+    "        else:\n",
+    "            print(f\"WARNING: no fepout files found for {replica}. Skipping.\")\n",
+    "    \n",
+    "    if u_nk is not None:\n",
+    "        fepruns[str(replica)] = FepRun(u_nk, None, None, None, None, None, None, None, next(itcolors))\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e81f216",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for key, feprun in fepruns.items():\n",
+    "    u_nk = feprun.u_nk\n",
+    "    feprun.perWindow, feprun.cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n",
+    "    feprun.forward, feprun.forward_error, feprun.backward, feprun.backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n",
+    "    feprun.per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "caae40d3",
+   "metadata": {},
+   "source": [
+    "# Plot data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd69e9c8-94d8-4a2a-9ec8-a24f78059431",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toprint = \"\"\n",
+    "dGs = []\n",
+    "errors = []\n",
+    "for key, feprun in fepruns.items():\n",
+    "    cumulative = feprun.cumulative\n",
+    "    dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n",
+    "    error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n",
+    "    dGs.append(dG)\n",
+    "    errors.append(error)\n",
+    "\n",
+    "    changeAndError = f'{key}: \\u0394G = {dG}\\u00B1{error} kcal/mol'\n",
+    "    toprint += '<font size=5>{}</font><br/>'.format(changeAndError)\n",
+    "\n",
+    "toprint += '<font size=5>{}</font><br/>'.format('__________________')\n",
+    "mean = np.average(dGs)\n",
+    "sterr = np.round(np.std(dGs),1)\n",
+    "toprint += '<font size=5>{}</font><br/>'.format(f'mean: {mean}')\n",
+    "toprint += '<font size=5>{}</font><br/>'.format(f'sterr: {sterr}')\n",
+    "Markdown(toprint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1fde633",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def do_agg_data(dataax, plotax):\n",
+    "    agg_data = []\n",
+    "    lines = dataax.lines\n",
+    "    for line in lines:\n",
+    "        agg_data.append(line.get_ydata())\n",
+    "    flat = np.array(agg_data).flatten()\n",
+    "    kernel = sp.stats.gaussian_kde(flat)\n",
+    "    pdfX = np.linspace(-1, 1, 1000)\n",
+    "    pdfY = kernel(pdfX)\n",
+    "    std = np.std(flat)\n",
+    "    mean = np.average(flat)\n",
+    "    temp = pd.Series(pdfY, index=pdfX)\n",
+    "    mode = temp.idxmax()\n",
+    "\n",
+    "    textstr = r\"$\\rm mode=$\"+f\"{np.round(mode,2)}\"+\"\\n\"+fr\"$\\mu$={np.round(mean,2)}\"+\"\\n\"+fr\"$\\sigma$={np.round(std,2)}\"\n",
+    "    props = dict(boxstyle='square', facecolor='white', alpha=1)\n",
+    "    plotax.text(0.175, 0.95, textstr, transform=plotax.transAxes, fontsize=14,\n",
+    "            verticalalignment='top', bbox=props)\n",
+    "\n",
+    "    return plotax"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "282938bb-988e-4e00-a2e6-ab018722569e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = None\n",
+    "for key, feprun in fepruns.items():\n",
+    "    if fig is None:\n",
+    "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
+    "    else:\n",
+    "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n",
+    "    #fig.suptitle(changeAndError)\n",
+    "\n",
+    "# hack to get aggregate data:\n",
+    "axes[3] = do_agg_data(axes[2], axes[3])\n",
+    "\n",
+    "\n",
+    "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
+    "axes[0].legend()\n",
+    "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c7f90d8c",
+   "metadata": {},
+   "source": [
+    "# Plot the estimated total change in free energy as a function of simulation time; contiguous subsets starting at t=0 (\"Forward\") and t=end (\"Reverse\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70523392-e935-4ff4-b6e9-cf679c787bcc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig, convAx = plt.subplots(1,1)\n",
+    "\n",
+    "for key, feprun in fepruns.items():\n",
+    "    convAx = safep.convergence_plot(convAx, \n",
+    "                                    feprun.forward*RT, \n",
+    "                                    feprun.forward_error*RT, \n",
+    "                                    feprun.backward*RT,\n",
+    "                                    feprun.backward_error*RT,\n",
+    "                                    fwdColor=feprun.color,\n",
+    "                                    bwdColor=feprun.color,\n",
+    "                                    errorbars=False\n",
+    "                                    )\n",
+    "    convAx.get_legend().remove()\n",
+    "\n",
+    "forward_line, = convAx.plot([],[],linestyle='-', color='black', label='Forward Time Sampling')\n",
+    "backward_line, = convAx.plot([],[],linestyle='--', color='black', label='Backward Time Sampling')\n",
+    "convAx.legend(handles=[forward_line, backward_line])\n",
+    "ymin = np.min(dGs)-1\n",
+    "ymax = np.max(dGs)+1\n",
+    "convAx.set_ylim((ymin,ymax))\n",
+    "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1bf6ac36-f102-4da3-82d2-36a7741584e5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import scipy as sp\n",
+    "fig, (Dax, pdfAx) = plt.subplots(1,2, gridspec_kw={'width_ratios': [2, 1]}, sharey='row', figsize=(10,5))\n",
+    "\n",
+    "for key, feprun in fepruns.items():\n",
+    "    deltas = feprun.per_lambda_convergence.BAR.df*RT\n",
+    "    lambdas = feprun.per_lambda_convergence.index\n",
+    "    Dax.errorbar(lambdas, deltas, color=feprun.color, label=key)\n",
+    "    Dax.set_xlabel(r\"$\\lambda$\")\n",
+    "    Dax.set_ylabel(r\"$\\delta_\\mathrm{50\\%}$ (kcal/mol)\")\n",
+    "\n",
+    "    kernel = sp.stats.gaussian_kde(deltas)\n",
+    "    pdfX = np.linspace(-0.3, 0.3, 1000)\n",
+    "    pdfY = kernel(pdfX)\n",
+    "    pdfAx.plot(pdfY, pdfX, label='KDE', color=feprun.color)\n",
+    "\n",
+    "Dax.legend()\n",
+    "pdfAx.set_xlabel(\"KDE\")\n",
+    "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b237a8ee-f33e-4c05-b4ed-856bb4f34ca2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e6ab4621",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "genfig = None\n",
+    "for key, feprun in fepruns.items():\n",
+    "    if genfig is None:\n",
+    "        genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
+    "    else:\n",
+    "        genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=genfig, axes=genaxes, hysttype='lines', label=key, color=feprun.color)\n",
+    "plt.delaxes(genaxes[0])\n",
+    "plt.delaxes(genaxes[1])\n",
+    "\n",
+    "genaxes[3] = do_agg_data(axes[2], axes[3])\n",
+    "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
+    "\n",
+    "for txt in genfig.texts:\n",
+    "    print(1)\n",
+    "    txt.set_visible(False)\n",
+    "    txt.set_text(\"\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b62e0eaa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b0435ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "genfig, [hystax, pdfax] = plt.subplots(1,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})\n",
+    "for key, feprun in fepruns.items():\n",
+    "    hystax, pdfax = safep.plot_hysteresis(hystax, pdfax, feprun.perWindow, hysttype='lines', label=key, color=feprun.color)\n",
+    "\n",
+    "pdfax = do_agg_data(hystax, pdfax)\n",
+    "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
+    "\n",
+    "for txt in genfig.texts:\n",
+    "    print(1)\n",
+    "    txt.set_visible(False)\n",
+    "    txt.set_text(\"\")\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96418460",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From bb887827a469f1d7f00d3649f015f2b7738e06ea Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Fri, 9 Feb 2024 20:34:26 -0500
Subject: [PATCH 04/14] whitespace and comments

---
 safep/plotting.py | 131 +++++++++++++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 48 deletions(-)

diff --git a/safep/plotting.py b/safep/plotting.py
index a5d354c..def93ee 100644
--- a/safep/plotting.py
+++ b/safep/plotting.py
@@ -134,14 +134,14 @@ def fb_discrepancy_hist(dG_f, dG_b):
     return plt.gca()
 
 
-def plot_general(cumulative, 
-                 cumulativeYlim, 
-                 perWindow, 
-                 perWindowYlim, 
-                 RT, 
-                 width=8, 
-                 height=4, 
-                 PDFtype='KDE', 
+def plot_general(cumulative,
+                 cumulativeYlim,
+                 perWindow,
+                 perWindowYlim,
+                 RT,
+                 width=8,
+                 height=4,
+                 PDFtype='KDE',
                  fontsize=12,
                  fig=None,
                  axes=None,
@@ -149,76 +149,111 @@ def plot_general(cumulative,
                  label=None,
                  color='blue',
                  errorbars=True):
+    '''
+    Plot the general analysis of cumulative and per-window changes in delta-G.
+
+    Arguments:
+    - cumulative: A pandas DataFrame containing the cumulative changes in delta-G.
+    - cumulativeYlim: The y-axis limits for the cumulative plot.
+    - perWindow: A pandas DataFrame containing the per-window changes in delta-G.
+    - perWindowYlim: The y-axis limits for the per-window plot.
+    - RT: The gas constant times the temperature.
+    - width: The width of the figure.
+    - height: The height of the figure.
+    - PDFtype: The type of probability density function to plot (either 'KDE' or 'Histogram').
+    - fontsize: The font size of the plot labels.
+    - fig: The figure object to use for plotting.
+    - axes: The axes objects to use for plotting.
+    - hysttype: The type of hysteresis plot to use (either 'classic' or 'lines').
+    - label: The label for the plot.
+    - color: The color of the plot.
+    - errorbars: Whether to include error bars in the per-window plot.
+
+    Returns:
+    - fig: The figure object.
+    - axes: The axes objects.
+    '''
     if fig is None and axes is None:
         fig, axes = plt.subplots(3,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})
-        ((cumAx, del1),( eachAx, del2), (hystAx, pdfAx)) = axes
+        ((cumul_ax, del1),( each_ax, del2), (hyst_ax, pdf_ax)) = axes
 
         fig.delaxes(del1)
         fig.delaxes(del2)
     else:
-        cumAx, eachAx, hystAx, pdfAx = axes
+        cumul_ax, each_ax, hyst_ax, pdf_ax = axes
 
     # Cumulative change in kcal/mol
-    cumAx.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1, label=label, color=color)
-    cumAx.set(ylabel=r'Cumulative $\mathrm{\Delta} G_{\lambda}$'+'\n(kcal/mol)', ylim=cumulativeYlim)
+    cumul_ax.errorbar(cumulative.index, cumulative.BAR.f*RT, yerr=cumulative.BAR.errors, marker=None, linewidth=1, label=label, color=color)
+    cumul_ax.set(ylabel=r'Cumulative $\mathrm{\Delta} G_{\lambda}$'+'\n(kcal/mol)', ylim=cumulativeYlim)
 
     # Per-window change in kcal/mol
     if errorbars:
-        eachAx.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1, color=color)
-        eachAx.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color)
-    eachAx.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color)
-        
-    eachAx.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=perWindowYlim)
+        each_ax.errorbar(perWindow.index, perWindow.BAR.df*RT, yerr=perWindow.BAR.ddf, marker=None, linewidth=1, color=color)
+        each_ax.errorbar(perWindow.index, -perWindow.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color)
+    each_ax.plot(perWindow.index, perWindow.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color)
+
+    each_ax.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=perWindowYlim)
 
     #Hysteresis Plots
-    diff = perWindow.EXP['difference']
+    hyst_ax, pdf_ax = plot_hysteresis(hyst_ax, pdf_ax, perWindow, hysttype, color, fontsize, PDFtype)
+
+    fig.set_figwidth(width)
+    fig.set_figheight(height*3)
+    fig.tight_layout()
+
+    for ax in [cumul_ax,each_ax,hyst_ax,pdf_ax]:
+        ax.set_ylabel(ax.get_ylabel(), fontsize=fontsize)
+
+    return fig, [cumul_ax,each_ax,hyst_ax,pdf_ax] 
+
+
+def plot_hysteresis(hyst_ax, pdf_ax, per_window, hysttype, color='blue', fontsize=12, pdf_type='KDE'):
+    diff = per_window.EXP['difference']
     assert hysttype in ['classic', 'lines'], f"ERROR: I don't know how to plot hysttype={hysttype}"
     if hysttype == 'classic':
-        hystAx.vlines(perWindow.index, np.zeros(len(perWindow)), diff, label="fwd - bwd", linewidth=2, color=color)
+        hyst_ax.vlines(per_window.index, np.zeros(len(per_window)), diff, label="fwd - bwd", linewidth=2, color=color)
     elif hysttype == 'lines':
-        hystAx.plot(perWindow.index, diff, label="fwd - bwd", linewidth=1, color=color)
+        hyst_ax.plot(per_window.index, diff, label="fwd - bwd", linewidth=1, color=color)
 
-    hystAx.set(ylabel=r'$\delta_\lambda$ (kcal/mol)', ylim=(-1,1))
-    hystAx.set_xlabel(xlabel=r'$\lambda$', fontsize=fontsize)
-    
-    if PDFtype=='KDE':
+    ytxt = r'$\delta_\lambda$ (kcal/mol)'
+    hyst_ax.set_ylabel(ylabel=ytxt, fontsize=fontsize)
+    hyst_ax.set_ylim((-1,1))
+
+    xtxt = r'$\lambda$'
+    hyst_ax.set_xlabel(xlabel=xtxt, fontsize=fontsize)
+
+    if pdf_type=='KDE':
         kernel = sp.stats.gaussian_kde(diff)
-        pdfX = np.linspace(-1, 1, 1000)
-        pdfY = kernel(pdfX)
-        pdfAx.plot(pdfY, pdfX, label='KDE', color=color)
-    elif PDFtype=='Histogram':
-        pdfY, pdfX = np.histogram(diff, density=True)
-        pdfX = pdfX[:-1]+(pdfX[1]-pdfX[0])/2
-        pdfAx.plot(pdfY, pdfX,  label="Estimated Distribution", color=color)
+        pdf_x = np.linspace(-1, 1, 1000)
+        pdf_y = kernel(pdf_x)
+        pdf_ax.plot(pdf_y, pdf_x, label='KDE', color=color)
+    elif pdf_type=='Histogram':
+        pdf_y, pdf_x = np.histogram(diff, density=True)
+        pdf_x = pdf_x[:-1]+(pdf_x[1]-pdf_x[0])/2
+        pdf_ax.plot(pdf_y, pdf_x,  label="Estimated Distribution", color=color)
     else:
-        raise(f"Error: PDFtype {PDFtype} not recognized")
-    
-    pdfAx.set_xlabel(PDFtype, fontsize=fontsize)
+        raise f"Error: PDFtype {pdf_type} not recognized"
+
+    pdf_ax.set_xlabel(pdf_type, fontsize=fontsize)
 
     std = np.std(diff)
     mean = np.average(diff)
-    temp = pd.Series(pdfY, index=pdfX)
+    temp = pd.Series(pdf_y, index=pdf_x)
     mode = temp.idxmax()
-    
-    textstr = r"$\rm mode=$"+f"{np.round(mode,2)}"+"\n"+fr"$\mu$={np.round(mean,2)}"+"\n"+fr"$\sigma$={np.round(std,2)}"
+
+    textstr = fr"$\rm mode=${np.round(mode,2)}"+"\n"+fr"$\mu$={np.round(mean,2)}"+"\n"+fr"$\sigma$={np.round(std,2)}"
     props = dict(boxstyle='square', facecolor='white', alpha=0.5)
-    pdfAx.text(0.15, 0.95, textstr, transform=pdfAx.transAxes, fontsize=14,
+    pdf_ax.text(0.15, 0.95, textstr, transform=pdf_ax.transAxes, fontsize=14,
             verticalalignment='top', bbox=props)
 
-    fig.set_figwidth(width)
-    fig.set_figheight(height*3)
-    fig.tight_layout()
-    
-    for ax in [cumAx,eachAx,hystAx,pdfAx]:
-        ax.set_ylabel(ax.get_ylabel(), fontsize=fontsize)
+    return hyst_ax, pdf_ax
 
-    return fig, [cumAx,eachAx,hystAx,pdfAx] 
 
 
-'''
-This is a deprecated plotting function
-'''
 def plot_general_legacy(cumulative, cumulativeYlim, perWindow, perWindowYlim, RT, width=8, height=4, PDFtype='KDE', fontsize=12):
+    '''
+    This is a deprecated plotting function
+    '''
     fig, axes = plt.subplots(4,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})
     ((cumAx, del1),( eachAx, del2), (ddGAx, del3), (hystAx, pdfAx)) = axes
 

From aa589f049410ca709778607435168663edd30787 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Fri, 9 Feb 2024 20:35:46 -0500
Subject: [PATCH 05/14] adding EXP estimation to convergence

---
 safep/processing.py | 38 +++++++++++++++++++++++++++-----------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/safep/processing.py b/safep/processing.py
index 1c6c58a..c9222d6 100644
--- a/safep/processing.py
+++ b/safep/processing.py
@@ -195,12 +195,13 @@ def alt_convergence(u_nk, nbins):
     return np.array(forward), np.array(forward_error)
 
 
-def do_convergence(u_nk, tau=1, num_points=10):
+def do_convergence(u_nk, tau=1, num_points=10, estimator='BAR'):
     """
     Convergence calculation. Incrementally adds data from either the start or the end of each windows simulation and calculates the resulting change in free energy.
     Arguments: u_nk, tau (an error scaling factor), num_points (number of chunks)
     Returns: forward-sampled estimate (starting from t=start), forward-sampled error, backward-sampled estimate (from t=end), backward-sampled error
     """
+    assert estimator in ['BAR', 'EXP'], "ERROR: I only know BAR and EXP estimators."
     groups = u_nk.groupby("fep-lambda")
 
     forward = []
@@ -210,18 +211,33 @@ def do_convergence(u_nk, tau=1, num_points=10):
     for i in range(1, num_points + 1):
         # forward
         partial = subsample(groups, 0, 100 * i / num_points)
-        estimate = BAR().fit(partial)
-        l, l_mid, f, df, ddf, errors = get_BAR(estimate)
-
-        forward.append(f.iloc[-1])
-        forward_error.append(errors[-1])
-
+        if estimator=='BAR':
+            estimate = BAR().fit(partial)
+            l, l_mid, f, df, ddf, errors = get_BAR(estimate)
+            f_append = f.iloc[-1]
+            err_append = errors[-1]
+        else:
+            expl, expmid, dG_fs, dG_bs = get_exponential(partial)
+            f_append = np.average([dG_fs[-1], dG_bs[-1]])
+            err_append = np.std([dG_fs[-1], dG_bs[-1]])
+
+        forward.append(f_append)
+        forward_error.append(err_append)
+        
+        # backward
         partial = subsample(groups, 100 * (1 - i / num_points), 100)
-        estimate = BAR().fit(partial)
-        l, l_mid, f, df, ddf, errors = get_BAR(estimate)
+        if estimator=='BAR':
+            estimate = BAR().fit(partial)
+            l, l_mid, f, df, ddf, errors = get_BAR(estimate)
+            f_append = f.iloc[-1]
+            err_append = errors[-1]
+        else:
+            expl, expmid, dG_fs, dG_bs = get_exponential(partial)
+            f_append = np.average([dG_fs[-1], dG_bs[-1]])
+            err_append = np.std([dG_fs[-1], dG_bs[-1]])
 
-        backward.append(f.iloc[-1])
-        backward_error.append(errors[-1])
+        backward.append(f_append)
+        backward_error.append(err_append)
 
     return (
         np.array(forward),

From 900e64f314aab95b64345b16f70e675b344ad1e3 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Wed, 21 Feb 2024 10:37:18 -0500
Subject: [PATCH 06/14] remove chuff

---
 .../BAR_Estimator_Multiple_Replicas.ipynb     | 87 +++----------------
 1 file changed, 12 insertions(+), 75 deletions(-)

diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
index 5741f82..310dbfe 100644
--- a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
+++ b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
@@ -33,14 +33,14 @@
     "import pandas as pd\n",
     "@dataclass\n",
     "class FepRun:\n",
-    "    u_nk:           pd.DataFrame()\n",
-    "    perWindow:      pd.DataFrame()\n",
-    "    cumulative:     pd.DataFrame()\n",
-    "    forward:        pd.DataFrame()\n",
-    "    forward_error:  pd.DataFrame()\n",
-    "    backward:       pd.DataFrame()\n",
-    "    backward_error: pd.DataFrame()\n",
-    "    per_lambda_convergence: pd.DataFrame()\n",
+    "    u_nk:           pd.DataFrame\n",
+    "    perWindow:      pd.DataFrame\n",
+    "    cumulative:     pd.DataFrame\n",
+    "    forward:        pd.DataFrame\n",
+    "    forward_error:  pd.DataFrame\n",
+    "    backward:       pd.DataFrame\n",
+    "    backward_error: pd.DataFrame\n",
+    "    per_lambda_convergence: pd.DataFrame\n",
     "    color: str"
    ]
   },
@@ -62,8 +62,8 @@
    "outputs": [],
    "source": [
     "dataroot = Path('.')\n",
-    "replicas = dataroot.glob('new_POPC*/')\n",
-    "filename_regex='outputs/*.fepout'\n",
+    "replicas = dataroot.glob('Sample_Data/')\n",
+    "filename_regex='*.fepout'\n",
     "\n",
     "temperature = 303.15\n",
     "RT = 0.00198720650096 * temperature\n",
@@ -289,41 +289,6 @@
     "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1bf6ac36-f102-4da3-82d2-36a7741584e5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import scipy as sp\n",
-    "fig, (Dax, pdfAx) = plt.subplots(1,2, gridspec_kw={'width_ratios': [2, 1]}, sharey='row', figsize=(10,5))\n",
-    "\n",
-    "for key, feprun in fepruns.items():\n",
-    "    deltas = feprun.per_lambda_convergence.BAR.df*RT\n",
-    "    lambdas = feprun.per_lambda_convergence.index\n",
-    "    Dax.errorbar(lambdas, deltas, color=feprun.color, label=key)\n",
-    "    Dax.set_xlabel(r\"$\\lambda$\")\n",
-    "    Dax.set_ylabel(r\"$\\delta_\\mathrm{50\\%}$ (kcal/mol)\")\n",
-    "\n",
-    "    kernel = sp.stats.gaussian_kde(deltas)\n",
-    "    pdfX = np.linspace(-0.3, 0.3, 1000)\n",
-    "    pdfY = kernel(pdfX)\n",
-    "    pdfAx.plot(pdfY, pdfX, label='KDE', color=feprun.color)\n",
-    "\n",
-    "Dax.legend()\n",
-    "pdfAx.set_xlabel(\"KDE\")\n",
-    "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b237a8ee-f33e-4c05-b4ed-856bb4f34ca2",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -347,36 +312,8 @@
     "    print(1)\n",
     "    txt.set_visible(False)\n",
     "    txt.set_text(\"\")\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b62e0eaa",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5b0435ef",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "genfig, [hystax, pdfax] = plt.subplots(1,2, sharex='col', sharey='row', gridspec_kw={'width_ratios': [2, 1]})\n",
-    "for key, feprun in fepruns.items():\n",
-    "    hystax, pdfax = safep.plot_hysteresis(hystax, pdfax, feprun.perWindow, hysttype='lines', label=key, color=feprun.color)\n",
-    "\n",
-    "pdfax = do_agg_data(hystax, pdfax)\n",
-    "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
-    "\n",
-    "for txt in genfig.texts:\n",
-    "    print(1)\n",
-    "    txt.set_visible(False)\n",
-    "    txt.set_text(\"\")\n",
-    "plt.show()"
+    "plt.show()\n",
+    "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))"
    ]
   },
   {

From 507928b172e87a642094c0748a7c106aa282ee52 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Wed, 21 Feb 2024 10:37:39 -0500
Subject: [PATCH 07/14] refine error estimation for N<3

---
 Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
index 310dbfe..98f8ee6 100644
--- a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
+++ b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
@@ -192,7 +192,12 @@
     "\n",
     "toprint += '<font size=5>{}</font><br/>'.format('__________________')\n",
     "mean = np.average(dGs)\n",
-    "sterr = np.round(np.std(dGs),1)\n",
+    "\n",
+    "#If there are only a few replicas, the MBAR estimated error will be more reliable, albeit underestimated\n",
+    "if len(dGs)<3:\n",
+    "    sterr = np.sqrt(np.sum(np.square(errors)))\n",
+    "else:\n",
+    "    sterr = np.round(np.std(dGs),1)\n",
     "toprint += '<font size=5>{}</font><br/>'.format(f'mean: {mean}')\n",
     "toprint += '<font size=5>{}</font><br/>'.format(f'sterr: {sterr}')\n",
     "Markdown(toprint)"

From 2c7ddd49ef19fc053ea49d5ffaab24a0c723b0f3 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Wed, 21 Feb 2024 10:39:27 -0500
Subject: [PATCH 08/14] replacing old basic BAR with new, fancier BAR

---
 Sample_Notebooks/BAR_Estimator_Basic.ipynb    | 250 ++++++++++---
 .../BAR_Estimator_Multiple_Replicas.ipynb     | 354 ------------------
 2 files changed, 206 insertions(+), 398 deletions(-)
 delete mode 100644 Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb

diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
index b638a7f..98f8ee6 100644
--- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb
+++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
@@ -15,12 +15,35 @@
     "import os\n",
     "from alchemlyb.parsing import namd\n",
     "from IPython.display import display, Markdown\n",
-    "\n",
+    "from pathlib import Path\n",
+    "from dataclasses import dataclass\n",
+    "import scipy as sp\n",
     "from alchemlyb.estimators import BAR\n",
     "import warnings\n",
     "warnings.simplefilter(action='ignore', category=FutureWarning)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf843fad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "@dataclass\n",
+    "class FepRun:\n",
+    "    u_nk:           pd.DataFrame\n",
+    "    perWindow:      pd.DataFrame\n",
+    "    cumulative:     pd.DataFrame\n",
+    "    forward:        pd.DataFrame\n",
+    "    forward_error:  pd.DataFrame\n",
+    "    backward:       pd.DataFrame\n",
+    "    backward_error: pd.DataFrame\n",
+    "    per_lambda_convergence: pd.DataFrame\n",
+    "    color: str"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "a9d06104",
@@ -38,19 +61,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "path='/path/to/data'\n",
-    "filename='*.fepout'\n",
+    "dataroot = Path('.')\n",
+    "replicas = dataroot.glob('Sample_Data/')\n",
+    "filename_regex='*.fepout'\n",
     "\n",
     "temperature = 303.15\n",
     "RT = 0.00198720650096 * temperature\n",
-    "decorrelate = True #Flag for decorrelation of samples\n",
-    "detectEQ = True #Flag for automated equilibrium detection\n",
-    "\n",
-    "fepoutFiles = glob(path+filename)\n",
-    "totalSize = 0\n",
-    "for file in fepoutFiles:\n",
-    "    totalSize += os.path.getsize(file)\n",
-    "print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")"
+    "detectEQ = True #Flag for automated equilibrium detection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6864313",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "colors = ['blue', 'red', 'green', 'purple', 'orange', 'violet', 'cyan']\n",
+    "itcolors = iter(colors)"
    ]
   },
   {
@@ -64,6 +92,14 @@
     "Note: alchemlyb operates in units of kT by default. We multiply by RT to convert to units of kcal/mol."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "80b1034d",
+   "metadata": {},
+   "source": [
+    "# Read and plot number of samples after detecting EQ"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -71,20 +107,44 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots()\n",
-    "u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n",
-    "safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n",
-    "\n",
-    "if detectEQ:\n",
-    "    print(\"Detecting equilibrium\")\n",
-    "    u_nk = safep.detect_equilibrium_u_nk(u_nk)\n",
-    "    safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n",
-    "if decorrelate and not detectEQ:\n",
-    "    print(\"Decorrelating\")\n",
-    "    u_nk = safep.decorrelate_u_nk(u_nk)\n",
-    "    safep.plot_samples(ax, u_nk, color='green', label='Decorrelated')\n",
+    "fepruns = {}\n",
+    "for replica in replicas:\n",
+    "    print(f\"Reading {replica}\")\n",
+    "    unkpath = replica.joinpath('decorrelated.csv')\n",
+    "    u_nk = None\n",
+    "    if unkpath.is_file():\n",
+    "        print(f\"Found existing dataframe. Reading.\")\n",
+    "        u_nk = safep.read_UNK(unkpath)\n",
+    "    else:\n",
+    "        print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n",
+    "        fepoutFiles = list(replica.glob(filename_regex))\n",
+    "        totalSize = 0\n",
+    "        for file in fepoutFiles:\n",
+    "            totalSize += os.path.getsize(file)\n",
+    "        print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")\n",
+    "\n",
+    "        if len(list(fepoutFiles))>0:\n",
+    "            print(\"Reading fepout files\")\n",
+    "            fig, ax = plt.subplots()\n",
+    "\n",
+    "            u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n",
+    "            u_nk = u_nk.sort_index(axis=0, level=1).sort_index(axis=1)\n",
+    "            safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n",
+    "\n",
+    "            if detectEQ:\n",
+    "                print(\"Detecting equilibrium\")\n",
+    "                u_nk = safep.detect_equilibrium_u_nk(u_nk)\n",
+    "                safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n",
+    "\n",
+    "            plt.savefig(f\"./{str(replica)}_FEP_number_of_samples.pdf\")\n",
+    "            plt.show()\n",
+    "            safep.save_UNK(u_nk, unkpath)\n",
+    "        else:\n",
+    "            print(f\"WARNING: no fepout files found for {replica}. Skipping.\")\n",
     "    \n",
-    "plt.savefig(f\"{path}FEP_number_of_samples.pdf\")"
+    "    if u_nk is not None:\n",
+    "        fepruns[str(replica)] = FepRun(u_nk, None, None, None, None, None, None, None, next(itcolors))\n",
+    "        "
    ]
   },
   {
@@ -94,9 +154,20 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "perWindow, cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n",
-    "forward, forward_error, backward, backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n",
-    "per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)"
+    "for key, feprun in fepruns.items():\n",
+    "    u_nk = feprun.u_nk\n",
+    "    feprun.perWindow, feprun.cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n",
+    "    feprun.forward, feprun.forward_error, feprun.backward, feprun.backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n",
+    "    feprun.per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "caae40d3",
+   "metadata": {},
+   "source": [
+    "# Plot data"
    ]
   },
   {
@@ -106,11 +177,59 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n",
-    "error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n",
+    "toprint = \"\"\n",
+    "dGs = []\n",
+    "errors = []\n",
+    "for key, feprun in fepruns.items():\n",
+    "    cumulative = feprun.cumulative\n",
+    "    dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n",
+    "    error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n",
+    "    dGs.append(dG)\n",
+    "    errors.append(error)\n",
+    "\n",
+    "    changeAndError = f'{key}: \\u0394G = {dG}\\u00B1{error} kcal/mol'\n",
+    "    toprint += '<font size=5>{}</font><br/>'.format(changeAndError)\n",
+    "\n",
+    "toprint += '<font size=5>{}</font><br/>'.format('__________________')\n",
+    "mean = np.average(dGs)\n",
+    "\n",
+    "#If there are only a few replicas, the MBAR estimated error will be more reliable, albeit underestimated\n",
+    "if len(dGs)<3:\n",
+    "    sterr = np.sqrt(np.sum(np.square(errors)))\n",
+    "else:\n",
+    "    sterr = np.round(np.std(dGs),1)\n",
+    "toprint += '<font size=5>{}</font><br/>'.format(f'mean: {mean}')\n",
+    "toprint += '<font size=5>{}</font><br/>'.format(f'sterr: {sterr}')\n",
+    "Markdown(toprint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1fde633",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def do_agg_data(dataax, plotax):\n",
+    "    agg_data = []\n",
+    "    lines = dataax.lines\n",
+    "    for line in lines:\n",
+    "        agg_data.append(line.get_ydata())\n",
+    "    flat = np.array(agg_data).flatten()\n",
+    "    kernel = sp.stats.gaussian_kde(flat)\n",
+    "    pdfX = np.linspace(-1, 1, 1000)\n",
+    "    pdfY = kernel(pdfX)\n",
+    "    std = np.std(flat)\n",
+    "    mean = np.average(flat)\n",
+    "    temp = pd.Series(pdfY, index=pdfX)\n",
+    "    mode = temp.idxmax()\n",
+    "\n",
+    "    textstr = r\"$\\rm mode=$\"+f\"{np.round(mode,2)}\"+\"\\n\"+fr\"$\\mu$={np.round(mean,2)}\"+\"\\n\"+fr\"$\\sigma$={np.round(std,2)}\"\n",
+    "    props = dict(boxstyle='square', facecolor='white', alpha=1)\n",
+    "    plotax.text(0.175, 0.95, textstr, transform=plotax.transAxes, fontsize=14,\n",
+    "            verticalalignment='top', bbox=props)\n",
     "\n",
-    "changeAndError = f'\\u0394G = {dG}\\u00B1{error} kcal/mol'\n",
-    "Markdown('<font size=5>{}</font><br/>'.format(changeAndError))"
+    "    return plotax"
    ]
   },
   {
@@ -120,9 +239,21 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, axes = safep.plot_general(cumulative, None, perWindow, None, RT)\n",
-    "fig.suptitle(changeAndError)\n",
-    "plt.savefig(f'{path}FEP_general_figures.pdf')"
+    "fig = None\n",
+    "for key, feprun in fepruns.items():\n",
+    "    if fig is None:\n",
+    "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
+    "    else:\n",
+    "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n",
+    "    #fig.suptitle(changeAndError)\n",
+    "\n",
+    "# hack to get aggregate data:\n",
+    "axes[3] = do_agg_data(axes[2], axes[3])\n",
+    "\n",
+    "\n",
+    "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
+    "axes[0].legend()\n",
+    "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))"
    ]
   },
   {
@@ -141,28 +272,59 @@
    "outputs": [],
    "source": [
     "fig, convAx = plt.subplots(1,1)\n",
-    "convAx = safep.convergence_plot(convAx, forward*RT, forward_error*RT, backward*RT, backward_error*RT)\n",
-    "plt.savefig(f'{path}FEP_convergence.pdf')"
+    "\n",
+    "for key, feprun in fepruns.items():\n",
+    "    convAx = safep.convergence_plot(convAx, \n",
+    "                                    feprun.forward*RT, \n",
+    "                                    feprun.forward_error*RT, \n",
+    "                                    feprun.backward*RT,\n",
+    "                                    feprun.backward_error*RT,\n",
+    "                                    fwdColor=feprun.color,\n",
+    "                                    bwdColor=feprun.color,\n",
+    "                                    errorbars=False\n",
+    "                                    )\n",
+    "    convAx.get_legend().remove()\n",
+    "\n",
+    "forward_line, = convAx.plot([],[],linestyle='-', color='black', label='Forward Time Sampling')\n",
+    "backward_line, = convAx.plot([],[],linestyle='--', color='black', label='Backward Time Sampling')\n",
+    "convAx.legend(handles=[forward_line, backward_line])\n",
+    "ymin = np.min(dGs)-1\n",
+    "ymax = np.max(dGs)+1\n",
+    "convAx.set_ylim((ymin,ymax))\n",
+    "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "1bf6ac36-f102-4da3-82d2-36a7741584e5",
+   "id": "e6ab4621",
    "metadata": {},
    "outputs": [],
    "source": [
-    "fig, ax = plt.subplots()\n",
-    "ax.errorbar(per_lambda_convergence.index, per_lambda_convergence.BAR.df*RT)\n",
-    "ax.set_xlabel(r\"$\\lambda$\")\n",
-    "ax.set_ylabel(r\"$D_{last-first}$ (kcal/mol)\")\n",
-    "plt.savefig(f\"{path}FEP_perLambda_convergence.pdf\")"
+    "genfig = None\n",
+    "for key, feprun in fepruns.items():\n",
+    "    if genfig is None:\n",
+    "        genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
+    "    else:\n",
+    "        genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=genfig, axes=genaxes, hysttype='lines', label=key, color=feprun.color)\n",
+    "plt.delaxes(genaxes[0])\n",
+    "plt.delaxes(genaxes[1])\n",
+    "\n",
+    "genaxes[3] = do_agg_data(axes[2], axes[3])\n",
+    "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
+    "\n",
+    "for txt in genfig.texts:\n",
+    "    print(1)\n",
+    "    txt.set_visible(False)\n",
+    "    txt.set_text(\"\")\n",
+    "plt.show()\n",
+    "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b237a8ee-f33e-4c05-b4ed-856bb4f34ca2",
+   "id": "96418460",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -184,7 +346,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,
diff --git a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb b/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
deleted file mode 100644
index 98f8ee6..0000000
--- a/Sample_Notebooks/BAR_Estimator_Multiple_Replicas.ipynb
+++ /dev/null
@@ -1,354 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "708b4d0d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import safep\n",
-    "import alchemlyb\n",
-    "from glob import glob\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "import os\n",
-    "from alchemlyb.parsing import namd\n",
-    "from IPython.display import display, Markdown\n",
-    "from pathlib import Path\n",
-    "from dataclasses import dataclass\n",
-    "import scipy as sp\n",
-    "from alchemlyb.estimators import BAR\n",
-    "import warnings\n",
-    "warnings.simplefilter(action='ignore', category=FutureWarning)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bf843fad",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "@dataclass\n",
-    "class FepRun:\n",
-    "    u_nk:           pd.DataFrame\n",
-    "    perWindow:      pd.DataFrame\n",
-    "    cumulative:     pd.DataFrame\n",
-    "    forward:        pd.DataFrame\n",
-    "    forward_error:  pd.DataFrame\n",
-    "    backward:       pd.DataFrame\n",
-    "    backward_error: pd.DataFrame\n",
-    "    per_lambda_convergence: pd.DataFrame\n",
-    "    color: str"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a9d06104",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "# User parameters"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e67078a6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataroot = Path('.')\n",
-    "replicas = dataroot.glob('Sample_Data/')\n",
-    "filename_regex='*.fepout'\n",
-    "\n",
-    "temperature = 303.15\n",
-    "RT = 0.00198720650096 * temperature\n",
-    "detectEQ = True #Flag for automated equilibrium detection"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a6864313",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "colors = ['blue', 'red', 'green', 'purple', 'orange', 'violet', 'cyan']\n",
-    "itcolors = iter(colors)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "30721156",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "# Extract key features from the MBAR fitting and get ΔG\n",
-    "Note: alchemlyb operates in units of kT by default. We multiply by RT to convert to units of kcal/mol."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "80b1034d",
-   "metadata": {},
-   "source": [
-    "# Read and plot number of samples after detecting EQ"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7c747b75-8fa3-48f9-9ab3-ac4a12982c01",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fepruns = {}\n",
-    "for replica in replicas:\n",
-    "    print(f\"Reading {replica}\")\n",
-    "    unkpath = replica.joinpath('decorrelated.csv')\n",
-    "    u_nk = None\n",
-    "    if unkpath.is_file():\n",
-    "        print(f\"Found existing dataframe. Reading.\")\n",
-    "        u_nk = safep.read_UNK(unkpath)\n",
-    "    else:\n",
-    "        print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n",
-    "        fepoutFiles = list(replica.glob(filename_regex))\n",
-    "        totalSize = 0\n",
-    "        for file in fepoutFiles:\n",
-    "            totalSize += os.path.getsize(file)\n",
-    "        print(f\"Will process {len(fepoutFiles)} fepout files.\\nTotal size:{np.round(totalSize/10**9, 2)}GB\")\n",
-    "\n",
-    "        if len(list(fepoutFiles))>0:\n",
-    "            print(\"Reading fepout files\")\n",
-    "            fig, ax = plt.subplots()\n",
-    "\n",
-    "            u_nk = namd.extract_u_nk(fepoutFiles, temperature)\n",
-    "            u_nk = u_nk.sort_index(axis=0, level=1).sort_index(axis=1)\n",
-    "            safep.plot_samples(ax, u_nk, color='blue', label='Raw Data')\n",
-    "\n",
-    "            if detectEQ:\n",
-    "                print(\"Detecting equilibrium\")\n",
-    "                u_nk = safep.detect_equilibrium_u_nk(u_nk)\n",
-    "                safep.plot_samples(ax, u_nk, color='orange', label='Equilibrium-Detected')\n",
-    "\n",
-    "            plt.savefig(f\"./{str(replica)}_FEP_number_of_samples.pdf\")\n",
-    "            plt.show()\n",
-    "            safep.save_UNK(u_nk, unkpath)\n",
-    "        else:\n",
-    "            print(f\"WARNING: no fepout files found for {replica}. Skipping.\")\n",
-    "    \n",
-    "    if u_nk is not None:\n",
-    "        fepruns[str(replica)] = FepRun(u_nk, None, None, None, None, None, None, None, next(itcolors))\n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7e81f216",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for key, feprun in fepruns.items():\n",
-    "    u_nk = feprun.u_nk\n",
-    "    feprun.perWindow, feprun.cumulative = safep.do_estimation(u_nk) #Run the BAR estimator on the fep data\n",
-    "    feprun.forward, feprun.forward_error, feprun.backward, feprun.backward_error = safep.do_convergence(u_nk) #Used later in the convergence plot'\n",
-    "    feprun.per_lambda_convergence = safep.do_per_lambda_convergence(u_nk)\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "caae40d3",
-   "metadata": {},
-   "source": [
-    "# Plot data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fd69e9c8-94d8-4a2a-9ec8-a24f78059431",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "toprint = \"\"\n",
-    "dGs = []\n",
-    "errors = []\n",
-    "for key, feprun in fepruns.items():\n",
-    "    cumulative = feprun.cumulative\n",
-    "    dG = np.round(cumulative.BAR.f.iloc[-1]*RT, 1)\n",
-    "    error = np.round(cumulative.BAR.errors.iloc[-1]*RT, 1)\n",
-    "    dGs.append(dG)\n",
-    "    errors.append(error)\n",
-    "\n",
-    "    changeAndError = f'{key}: \\u0394G = {dG}\\u00B1{error} kcal/mol'\n",
-    "    toprint += '<font size=5>{}</font><br/>'.format(changeAndError)\n",
-    "\n",
-    "toprint += '<font size=5>{}</font><br/>'.format('__________________')\n",
-    "mean = np.average(dGs)\n",
-    "\n",
-    "#If there are only a few replicas, the MBAR estimated error will be more reliable, albeit underestimated\n",
-    "if len(dGs)<3:\n",
-    "    sterr = np.sqrt(np.sum(np.square(errors)))\n",
-    "else:\n",
-    "    sterr = np.round(np.std(dGs),1)\n",
-    "toprint += '<font size=5>{}</font><br/>'.format(f'mean: {mean}')\n",
-    "toprint += '<font size=5>{}</font><br/>'.format(f'sterr: {sterr}')\n",
-    "Markdown(toprint)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d1fde633",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def do_agg_data(dataax, plotax):\n",
-    "    agg_data = []\n",
-    "    lines = dataax.lines\n",
-    "    for line in lines:\n",
-    "        agg_data.append(line.get_ydata())\n",
-    "    flat = np.array(agg_data).flatten()\n",
-    "    kernel = sp.stats.gaussian_kde(flat)\n",
-    "    pdfX = np.linspace(-1, 1, 1000)\n",
-    "    pdfY = kernel(pdfX)\n",
-    "    std = np.std(flat)\n",
-    "    mean = np.average(flat)\n",
-    "    temp = pd.Series(pdfY, index=pdfX)\n",
-    "    mode = temp.idxmax()\n",
-    "\n",
-    "    textstr = r\"$\\rm mode=$\"+f\"{np.round(mode,2)}\"+\"\\n\"+fr\"$\\mu$={np.round(mean,2)}\"+\"\\n\"+fr\"$\\sigma$={np.round(std,2)}\"\n",
-    "    props = dict(boxstyle='square', facecolor='white', alpha=1)\n",
-    "    plotax.text(0.175, 0.95, textstr, transform=plotax.transAxes, fontsize=14,\n",
-    "            verticalalignment='top', bbox=props)\n",
-    "\n",
-    "    return plotax"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "282938bb-988e-4e00-a2e6-ab018722569e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig = None\n",
-    "for key, feprun in fepruns.items():\n",
-    "    if fig is None:\n",
-    "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
-    "    else:\n",
-    "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n",
-    "    #fig.suptitle(changeAndError)\n",
-    "\n",
-    "# hack to get aggregate data:\n",
-    "axes[3] = do_agg_data(axes[2], axes[3])\n",
-    "\n",
-    "\n",
-    "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
-    "axes[0].legend()\n",
-    "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c7f90d8c",
-   "metadata": {},
-   "source": [
-    "# Plot the estimated total change in free energy as a function of simulation time; contiguous subsets starting at t=0 (\"Forward\") and t=end (\"Reverse\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "70523392-e935-4ff4-b6e9-cf679c787bcc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig, convAx = plt.subplots(1,1)\n",
-    "\n",
-    "for key, feprun in fepruns.items():\n",
-    "    convAx = safep.convergence_plot(convAx, \n",
-    "                                    feprun.forward*RT, \n",
-    "                                    feprun.forward_error*RT, \n",
-    "                                    feprun.backward*RT,\n",
-    "                                    feprun.backward_error*RT,\n",
-    "                                    fwdColor=feprun.color,\n",
-    "                                    bwdColor=feprun.color,\n",
-    "                                    errorbars=False\n",
-    "                                    )\n",
-    "    convAx.get_legend().remove()\n",
-    "\n",
-    "forward_line, = convAx.plot([],[],linestyle='-', color='black', label='Forward Time Sampling')\n",
-    "backward_line, = convAx.plot([],[],linestyle='--', color='black', label='Backward Time Sampling')\n",
-    "convAx.legend(handles=[forward_line, backward_line])\n",
-    "ymin = np.min(dGs)-1\n",
-    "ymax = np.max(dGs)+1\n",
-    "convAx.set_ylim((ymin,ymax))\n",
-    "plt.savefig(dataroot.joinpath('FEP_convergence.pdf'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e6ab4621",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "genfig = None\n",
-    "for key, feprun in fepruns.items():\n",
-    "    if genfig is None:\n",
-    "        genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
-    "    else:\n",
-    "        genfig, genaxes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=genfig, axes=genaxes, hysttype='lines', label=key, color=feprun.color)\n",
-    "plt.delaxes(genaxes[0])\n",
-    "plt.delaxes(genaxes[1])\n",
-    "\n",
-    "genaxes[3] = do_agg_data(axes[2], axes[3])\n",
-    "genaxes[2].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
-    "\n",
-    "for txt in genfig.texts:\n",
-    "    print(1)\n",
-    "    txt.set_visible(False)\n",
-    "    txt.set_text(\"\")\n",
-    "plt.show()\n",
-    "plt.savefig(dataroot.joinpath('FEP_perLambda_convergence.pdf'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "96418460",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 6e7b1b80bd9400287a09f17a2720d56c61aedaf0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20H=C3=A9nin?= <heninj@gmail.com>
Date: Thu, 22 Feb 2024 23:09:52 +0100
Subject: [PATCH 09/14] Variable name changes, test replica code

---
 Sample_Notebooks/BAR_Estimator_Basic.ipynb | 9 +++++----
 Sample_Notebooks/Replica1                  | 1 +
 Sample_Notebooks/Replica2                  | 1 +
 Sample_Notebooks/Replica3                  | 1 +
 4 files changed, 8 insertions(+), 4 deletions(-)
 create mode 120000 Sample_Notebooks/Replica1
 create mode 120000 Sample_Notebooks/Replica2
 create mode 120000 Sample_Notebooks/Replica3

diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
index 98f8ee6..f4bf77f 100644
--- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb
+++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
@@ -62,8 +62,9 @@
    "outputs": [],
    "source": [
     "dataroot = Path('.')\n",
-    "replicas = dataroot.glob('Sample_Data/')\n",
-    "filename_regex='*.fepout'\n",
+    "replica_pattern='Replica?'\n",
+    "replicas = dataroot.glob(replica_pattern)\n",
+    "filename_pattern='*.fepout'\n",
     "\n",
     "temperature = 303.15\n",
     "RT = 0.00198720650096 * temperature\n",
@@ -117,7 +118,7 @@
     "        u_nk = safep.read_UNK(unkpath)\n",
     "    else:\n",
     "        print(f\"Didn't find existing dataframe at {unkpath}. Checking for raw fepout files.\")\n",
-    "        fepoutFiles = list(replica.glob(filename_regex))\n",
+    "        fepoutFiles = list(replica.glob(filename_pattern))\n",
     "        totalSize = 0\n",
     "        for file in fepoutFiles:\n",
     "            totalSize += os.path.getsize(file)\n",
@@ -346,7 +347,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.9.16"
   }
  },
  "nbformat": 4,
diff --git a/Sample_Notebooks/Replica1 b/Sample_Notebooks/Replica1
new file mode 120000
index 0000000..df9f7a8
--- /dev/null
+++ b/Sample_Notebooks/Replica1
@@ -0,0 +1 @@
+Sample_Data
\ No newline at end of file
diff --git a/Sample_Notebooks/Replica2 b/Sample_Notebooks/Replica2
new file mode 120000
index 0000000..df9f7a8
--- /dev/null
+++ b/Sample_Notebooks/Replica2
@@ -0,0 +1 @@
+Sample_Data
\ No newline at end of file
diff --git a/Sample_Notebooks/Replica3 b/Sample_Notebooks/Replica3
new file mode 120000
index 0000000..df9f7a8
--- /dev/null
+++ b/Sample_Notebooks/Replica3
@@ -0,0 +1 @@
+Sample_Data
\ No newline at end of file

From a51e13189f1ddd622ac055ec1ddc5ca9da940059 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Sat, 2 Mar 2024 11:29:05 -0500
Subject: [PATCH 10/14] removing redundant definition

---
 safep/plotting.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/safep/plotting.py b/safep/plotting.py
index def93ee..80aceee 100644
--- a/safep/plotting.py
+++ b/safep/plotting.py
@@ -18,27 +18,27 @@ def plot_samples(theax, u_nk, color='blue', label='Raw Data'):
     return theax
 
 
-def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None):
-    '''
-    A revised convergence plot. Plays nicely with other plotting functions and is more customizable.
-    Arguments: theax[is] on which to plot, fs (forward estimates), ferr (forward errors), bs (backward-sampled estimates), berr (backward-sampled errors), fwdColor, bwdColor, lgndF legend forward color, lgndB legend backward color
-    Returns: theax[is]
-    '''
-    if not lgndF:
-        lgndF=fwdColor
-        lgndB=bwdColor
+# def convergence_plot(theax, fs, ferr, bs, berr, fwdColor='#0072B2', bwdColor='#D55E00', lgndF=None, lgndB=None):
+#     '''
+#     A revised convergence plot. Plays nicely with other plotting functions and is more customizable.
+#     Arguments: theax[is] on which to plot, fs (forward estimates), ferr (forward errors), bs (backward-sampled estimates), berr (backward-sampled errors), fwdColor, bwdColor, lgndF legend forward color, lgndB legend backward color
+#     Returns: theax[is]
+#     '''
+#     if not lgndF:
+#         lgndF=fwdColor
+#         lgndB=bwdColor
         
-    theax.errorbar(np.arange(len(fs))/len(fs)+0.1, fs, yerr=ferr, marker='o', linewidth=1, color=fwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=fwdColor, ms=5)
-    theax.errorbar(np.arange(len(bs))/len(fs)+0.1, bs, yerr=berr, marker='o', linewidth=1, color=bwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=bwdColor, ms=5, linestyle='--')
+#     theax.errorbar(np.arange(len(fs))/len(fs)+0.1, fs, yerr=ferr, marker='o', linewidth=1, color=fwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=fwdColor, ms=5)
+#     theax.errorbar(np.arange(len(bs))/len(fs)+0.1, bs, yerr=berr, marker='o', linewidth=1, color=bwdColor, markerfacecolor='white', markeredgewidth=1, markeredgecolor=bwdColor, ms=5, linestyle='--')
 
-    theax.xaxis.set_ticks([0, 0.2, 0.4, 0.6, 0.8, 1])
+#     theax.xaxis.set_ticks([0, 0.2, 0.4, 0.6, 0.8, 1])
     
-    finalMean = fs[-1]
-    theax.axhline(y= finalMean, linestyle='-.', color='gray')
-    theax.plot(0, finalMean, linewidth=1, color=lgndF, label='Forward Time Sampling')
-    theax.plot(0, finalMean, linewidth=1, color=lgndB, linestyle='--', label='Backward Time Sampling')
+#     finalMean = fs[-1]
+#     theax.axhline(y= finalMean, linestyle='-.', color='gray')
+#     theax.plot(0, finalMean, linewidth=1, color=lgndF, label='Forward Time Sampling')
+#     theax.plot(0, finalMean, linewidth=1, color=lgndB, linestyle='--', label='Backward Time Sampling')
     
-    return theax
+#     return theax
 
 def do_conv_plot(ax, X, fs, ferr, fwdColor, label=None):
     '''

From 119876e139d9da45fd3173c3fc241eeb7ff37308 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Thu, 13 Jun 2024 09:04:58 -0400
Subject: [PATCH 11/14] add labels to per_window plot

---
 safep/plotting.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/safep/plotting.py b/safep/plotting.py
index e377f19..5ceb74a 100644
--- a/safep/plotting.py
+++ b/safep/plotting.py
@@ -187,9 +187,9 @@ def plot_general(cumulative,
 
     # Per-window change in kcal/mol
     if errorbars:
-        each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, color=color)
-        each_ax.errorbar(per_window.index, -per_window.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color)
-    each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color)
+        each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, color=color, label="forward EXP")
+        each_ax.errorbar(per_window.index, -per_window.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color, label="backward EXP")
+    each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color, label="BAR")
 
     each_ax.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=per_window_ylim)
 

From 8738f587fb327851863777ebc6f7968e87d0f841 Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Thu, 13 Jun 2024 09:08:33 -0400
Subject: [PATCH 12/14] add legend to per_window general plot (second row) to
 clarify solid and dashed lines

---
 Sample_Notebooks/BAR_Estimator_Basic.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
index f4bf77f..a9f700a 100644
--- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb
+++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
@@ -244,6 +244,7 @@
     "for key, feprun in fepruns.items():\n",
     "    if fig is None:\n",
     "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, hysttype='lines', label=key, color=feprun.color)\n",
+    "        axes[1].legend()\n",
     "    else:\n",
     "        fig, axes = safep.plot_general(feprun.cumulative, None, feprun.perWindow, None, RT, fig=fig, axes=axes, hysttype='lines', label=key, color=feprun.color)\n",
     "    #fig.suptitle(changeAndError)\n",
@@ -251,7 +252,6 @@
     "# hack to get aggregate data:\n",
     "axes[3] = do_agg_data(axes[2], axes[3])\n",
     "\n",
-    "\n",
     "axes[0].set_title(str(mean)+r'$\\pm$'+str(sterr)+' kcal/mol')\n",
     "axes[0].legend()\n",
     "plt.savefig(dataroot.joinpath('FEP_general_figures.pdf'))"
@@ -347,7 +347,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,

From e8dc613614b4a446727e10aba3df019787a9e2ad Mon Sep 17 00:00:00 2001
From: EzryStIago <ezrysm@gmail.com>
Date: Thu, 13 Jun 2024 09:10:59 -0400
Subject: [PATCH 13/14] make EXP curves in general plot alpha=0.5

---
 safep/plotting.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/safep/plotting.py b/safep/plotting.py
index 5ceb74a..a3471cf 100644
--- a/safep/plotting.py
+++ b/safep/plotting.py
@@ -187,9 +187,9 @@ def plot_general(cumulative,
 
     # Per-window change in kcal/mol
     if errorbars:
-        each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, color=color, label="forward EXP")
+        each_ax.errorbar(per_window.index, per_window.BAR.df*RT, yerr=per_window.BAR.ddf, marker=None, linewidth=1, alpha=0.5, color=color, label="forward EXP")
         each_ax.errorbar(per_window.index, -per_window.EXP.dG_b*RT, marker=None, linewidth=1, alpha=0.5, linestyle='--', color=color, label="backward EXP")
-    each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, alpha=0.5, color=color, label="BAR")
+    each_ax.plot(per_window.index, per_window.EXP.dG_f*RT, marker=None, linewidth=1, color=color, label="BAR")
 
     each_ax.set(ylabel=r'$\mathrm{\Delta} G_\lambda$'+'\n'+r'$\left(kcal/mol\right)$', ylim=per_window_ylim)
 

From 6f75ccc82a9f43ee0d297c94e6dec022376e07d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20H=C3=A9nin?= <heninj@gmail.com>
Date: Wed, 19 Jun 2024 13:37:45 +0200
Subject: [PATCH 14/14] Fix parameter names

---
 Sample_Notebooks/BAR_Estimator_Basic.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Sample_Notebooks/BAR_Estimator_Basic.ipynb b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
index a9f700a..7896a58 100644
--- a/Sample_Notebooks/BAR_Estimator_Basic.ipynb
+++ b/Sample_Notebooks/BAR_Estimator_Basic.ipynb
@@ -280,8 +280,8 @@
     "                                    feprun.forward_error*RT, \n",
     "                                    feprun.backward*RT,\n",
     "                                    feprun.backward_error*RT,\n",
-    "                                    fwdColor=feprun.color,\n",
-    "                                    bwdColor=feprun.color,\n",
+    "                                    fwd_color=feprun.color,\n",
+    "                                    bwd_color=feprun.color,\n",
     "                                    errorbars=False\n",
     "                                    )\n",
     "    convAx.get_legend().remove()\n",
@@ -347,7 +347,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,