wilsonmr · jmarshrossney · Apr 26, 2021 · Apr 26, 2021 · Apr 26, 2021 · Apr 26, 2021
diff --git a/anvil/benchmark_config/free_scalar_sample.yml b/anvil/benchmark_config/free_scalar_sample.yml
@@ -1,5 +1,6 @@
 training_output: /tmp/del_me_anvil_benchmark
 cp_id: -1
+layer_id: -1
 
 sample_size: 100000
 thermalization: 10

diff --git a/anvil/config.py b/anvil/config.py
@@ -13,6 +13,7 @@
 from anvil.checkpoint import TrainingOutput
 from anvil.models import MODEL_OPTIONS
 from anvil.distributions import BASE_OPTIONS, TARGET_OPTIONS
+import anvil.sample as sample
 
 from random import randint
 from sys import maxsize
@@ -121,6 +122,17 @@ def produce_checkpoint(self, cp_id=None, training_output=None):
         # get index from training_output class
         return training_output.checkpoints[training_output.cp_ids.index(cp_id)]
 
+    @element_of("layer_ids")
+    def parse_layer_id(self, layer_id: int = -1):
+        return layer_id
+
+    @explicit_node
+    def produce_configs(self, layer_id):
+        if layer_id == -1:
+            return sample.configs_from_metropolis
+        else:
+            return sample.configs_from_model
+
     def produce_training_context(self, training_output):
         """Given a training output produce the context of that training"""
         # NOTE: This seems a bit hacky, exposing the entire training configuration
@@ -183,12 +195,11 @@ def parse_bootstrap_sample_size(self, n_boot: int):
         log.warning(f"Using user specified bootstrap sample size: {n_boot}")
         return n_boot
 
-    def produce_bootstrap_seed(
-        self, manual_bootstrap_seed: (int, type(None)) = None):
+    def produce_bootstrap_seed(self, manual_bootstrap_seed: (int, type(None)) = None):
         if manual_bootstrap_seed is None:
             return randint(0, maxsize)
         # numpy is actually this strict but let's keep it sensible.
-        if (manual_bootstrap_seed < 0) or (manual_bootstrap_seed > 2**32):
+        if (manual_bootstrap_seed < 0) or (manual_bootstrap_seed > 2 ** 32):
             raise ConfigError("Seed is outside of appropriate range: [0, 2 ** 32]")
         return manual_bootstrap_seed
 
@@ -213,4 +224,4 @@ def produce_use_multiprocessing(self):
         """Don't use mp on MacOS"""
         if platform.system() == "Darwin":
             return False
-        return True
+        return True
diff --git a/anvil/models.py b/anvil/models.py
@@ -4,14 +4,15 @@
 Module containing reportengine actions which return callable objects that execute
 normalising flows constructed from multiple layers via function composition.
 """
+import torch
 from functools import partial
+from reportengine import collect
 
 from anvil.core import Sequential
-
 import anvil.layers as layers
 
 
-def coupling_pair(coupling_layer, size_half, **layer_spec):
+def coupling_block(coupling_layer, size_half, **layer_spec):
     """Helper function which returns a callable object that performs a coupling
     transformation on both even and odd lattice sites."""
     coupling_transformation = partial(coupling_layer, size_half, **layer_spec)
@@ -31,7 +32,7 @@ def real_nvp(
     """Action that returns a callable object that performs a sequence of `n_affine`
     affine coupling transformations on both partitions of the input vector."""
     blocks = [
-        coupling_pair(
+        coupling_block(
             layers.AffineLayer,
             size_half,
             hidden_shape=hidden_shape,
@@ -53,7 +54,7 @@ def nice(
     """Action that returns a callable object that performs a sequence of `n_affine`
     affine coupling transformations on both partitions of the input vector."""
     blocks = [
-        coupling_pair(
+        coupling_block(
             layers.AdditiveLayer,
             size_half,
             hidden_shape=hidden_shape,
@@ -74,10 +75,10 @@ def rational_quadratic_spline(
     activation="tanh",
     z2_equivar_spline=False,
 ):
-    """Action that returns a callable object that performs a pair of circular spline
+    """Action that returns a callable object that performs a block of circular spline
     transformations, one on each half of the input vector."""
     blocks = [
-        coupling_pair(
+        coupling_block(
             layers.RationalQuadraticSplineLayer,
             size_half,
             interval=interval,
@@ -96,12 +97,25 @@ def rational_quadratic_spline(
 
 
 def spline_affine(real_nvp, rational_quadratic_spline):
-    return Sequential(rational_quadratic_spline, real_nvp)
+    return Sequential(*rational_quadratic_spline, *real_nvp)
 
 
 def affine_spline(real_nvp, rational_quadratic_spline):
-    return Sequential(real_nvp, rational_quadratic_spline)
+    return Sequential(*real_nvp, *rational_quadratic_spline)
+
+# TODO replace this
+_loaded_model = collect("loaded_model", ("training_context",))
+
+
+def model_weights(_loaded_model):
+    model = _loaded_model[0]
 
+    for block in model:
+        params = {
+            key: tensor.flatten().numpy() for key, tensor in block.state_dict().items()
+        }
+        if len(params) > 1:  # only want coupling layers
+            yield params
 
 MODEL_OPTIONS = {
     "nice": nice,

diff --git a/anvil/observables.py b/anvil/observables.py
@@ -12,6 +12,7 @@
 
 log = logging.getLogger(__name__)
 
+
 def cosh_shift(x, xi, A, c):
     return A * np.cosh(-x / xi) + c
 
@@ -42,8 +43,11 @@ def fit_zero_momentum_correlator(zero_momentum_correlator, training_geometry):
 
 
 def correlation_length_from_fit(fit_zero_momentum_correlator):
-    popt, pcov, _ = fit_zero_momentum_correlator
-    return popt[0], np.sqrt(pcov[0, 0])
+    if fit_zero_momentum_correlator is not None:
+        popt, pcov, _ = fit_zero_momentum_correlator
+        return popt[0], np.sqrt(pcov[0, 0])
+    else:
+        return None, None
 
 
 def autocorrelation(chain):

diff --git a/anvil/plot.py b/anvil/plot.py
@@ -7,6 +7,7 @@
 import torch
 import numpy as np
 import matplotlib.pyplot as plt
+import matplotlib as mpl
 from matplotlib.ticker import MaxNLocator
 
 from reportengine.figure import figure, figuregen
@@ -15,93 +16,44 @@
 from anvil.observables import cosh_shift
 
 
-def field_component(i, x_base, phi_model, base_neg, model_neg):
-    fig, ax = plt.subplots()
-
-    ax.hist(x_base, bins=50, density=True, histtype="step", label="base")
-    ax.hist(phi_model, bins=50, density=True, histtype="step", label="model, full")
-    ax.hist(
-        base_neg, bins=50, density=True, histtype="step", label="model, $M_{base} < 0$"
-    )
-    ax.hist(
-        model_neg, bins=50, density=True, histtype="step", label="model, $M_{mod} < 0$"
-    )
-    ax.set_title(f"Coordinate {i}")
-    ax.legend()
-    fig.tight_layout()
-    return fig
-
-
-def field_components(loaded_model, base_dist, lattice_size):
-    """Plot the distributions of base coordinates 'x' and output coordinates 'phi' and,
-    if known, plot the pdf of the target distribution."""
-    sample_size = 10000
-
-    # Generate a large sample from the base distribution and pass it through the trained model
-    with torch.no_grad():
-        x_base, _ = base_dist(sample_size)
-        sign = x_base.sum(dim=1).sign()
-        neg = (sign < 0).nonzero().squeeze()
-        phi_model, model_log_density = loaded_model(x_base, 0, neg)
-
-    base_neg = phi_model[neg]
-
-    sign = phi_model.sum(dim=1).sign()
-    neg = (sign < 0).nonzero().squeeze()
-    model_neg = phi_model[neg]
-
-    # Convert to shape (n_coords, sample_size * lattice_size)
-    # NOTE: this is all pointless for the 1-component scalar
-    x_base = x_base.reshape(sample_size * lattice_size, -1).transpose(0, 1)
-    phi_model = phi_model.reshape(sample_size * lattice_size, -1).transpose(0, 1)
-
-    base_neg = base_neg.reshape(1, -1)
-    model_neg = model_neg.reshape(1, -1)
-
-    for i in range(x_base.shape[0]):
-        yield field_component(i, x_base[i], phi_model[i], base_neg[i], model_neg[i])
-
+# TODO: subplots for different neural networks
+def plot_layer_weights(model_weights):
+    for weights in model_weights:
+        fig, ax = plt.subplots()
+        labels = list(weights.keys())
+        data = weights.values()
+        ax.hist(data, bins=50, stacked=True, label=labels)
+        fig.legend()
+        yield fig
 
-_plot_field_components = collect("field_components", ("training_context",))
 
+@figuregen
+def plot_layerwise_weights(plot_layer_weights):
+    yield from plot_layer_weights
 
-def example_configs(loaded_model, base_dist, training_geometry):
-    sample_size = 10
-
-    # Generate a large sample from the base distribution and pass it through the trained model
-    with torch.no_grad():
-        x_base, _ = base_dist(sample_size)
-        sign = x_base.sum(dim=1).sign()
-        neg = (sign < 0).nonzero().squeeze()
-        phi_model, model_log_density = loaded_model(x_base, 0, neg)
-
-    L = int(np.sqrt(phi_model.shape[1]))
-
-    phi_true = np.zeros((4, L, L))
-    phi_true[:, training_geometry.checkerboard] = phi_model[:4, : L ** 2 // 2]
-    phi_true[:, ~training_geometry.checkerboard] = phi_model[:4, L ** 2 // 2 :]
-
-    fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
-    for i, ax in enumerate(axes.flatten()):
-        conf = ax.imshow(phi_true[i])
-        fig.colorbar(conf, ax=ax)
-
-    fig.suptitle("Example configurations")
 
+@figure
+def plot_layer_histogram(configs):
+    v = configs.numpy()
+    v_pos = v[v.sum(axis=1) > 0].flatten()
+    v_neg = v[v.sum(axis=1) < 0].flatten()
+    fig, ax = plt.subplots()
+    ax.hist([v_pos, v_neg], bins=50, density=True, histtype="step")
     return fig
 
 
-_plot_example_configs = collect("example_configs", ("training_context",))
-
-
 @figure
-def plot_example_configs(_plot_example_configs):
-    return _plot_example_configs[0]
-
-
-@figuregen
-def plot_field_components(_plot_field_components):
-    yield from _plot_field_components[0]
+def plot_correlation_length(table_correlation_length):
+    fig, ax = plt.subplots()
+    ax.errorbar(
+        x=table_correlation_length.index,
+        y=table_correlation_length.value,
+        yerr=table_correlation_length.error,
+        linestyle="",
+        marker="o",
+    )
+    ax.set_xticklabels(table_correlation_length.index, rotation=45)
+    return fig
 
 
 @figure
@@ -120,14 +72,14 @@ def plot_zero_momentum_correlator(
 
     if fit_zero_momentum_correlator is not None:
         popt, pcov, t0 = fit_zero_momentum_correlator
-        shift = popt[2]
+        xi, A, shift = popt
 
         t = np.linspace(t0, T - t0, 100)
         ax.plot(
             t,
-            cosh_shift(t - T // 2, *popt) - popt[2],
+            cosh_shift(t - T // 2, *popt) - shift,
             "r--",
-            label=r"fit $A \cosh(-(t - T/2) / \xi) + c$",
+            label=r"fit $A \cosh(-(t - T/2) / \xi) + c$" + "\n" + fr"$\xi = ${xi:.2f}",
         )
     ax.errorbar(
         x=np.arange(T),
@@ -172,19 +124,23 @@ def plot_two_point_correlator(two_point_correlator):
 
     """
     corr = two_point_correlator.mean(axis=-1)
-    std = two_point_correlator.std(axis=-1)
+    error = two_point_correlator.std(axis=-1)
+    fractional_error = np.abs(error / corr)
+    L = corr.shape[0]
 
-    fractional_std = std / abs(corr)
+    corr = np.roll(corr, (-L // 2 - 1, -L // 2 - 1), (0, 1))
+    fractional_error = np.roll(fractional_error, (-L // 2 - 1, -L // 2 - 1), (0, 1))
 
     fig, (ax_mean, ax_std) = plt.subplots(1, 2, figsize=(13, 6), sharey=True)
     ax_std.set_title(r"$\sigma_G / G$")
     ax_mean.set_title("$G(x, t)$")
     ax_mean.set_xlabel("$x$")
     ax_std.set_xlabel("$x$")
     ax_mean.set_ylabel("$t$")
+    norm = mpl.colors.LogNorm()
 
-    im1 = ax_mean.imshow(corr)
-    im2 = ax_std.imshow(fractional_std)
+    im1 = ax_mean.imshow(corr, norm=norm)
+    im2 = ax_std.imshow(fractional_error, norm=norm)
 
     ax_mean.yaxis.set_major_locator(MaxNLocator(integer=True))
     ax_mean.xaxis.set_major_locator(MaxNLocator(integer=True))

diff --git a/anvil/sample.py b/anvil/sample.py
@@ -120,7 +120,9 @@ def metropolis_hastings(
                 history.append(0)
 
         tau = calc_tau_chain(history)
-        log.info(f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}")
+        log.info(
+            f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}"
+        )
         sample_interval = ceil(2 * tau)  # update sample interval
 
     log.info(f"Using sampling interval: {sample_interval}")
@@ -170,7 +172,7 @@ def metropolis_hastings(
 _metropolis_hastings = collect("metropolis_hastings", ("training_context",))
 
 
-def configs(_metropolis_hastings):
+def configs_from_metropolis(_metropolis_hastings):
     return _metropolis_hastings[0][0]
 
 
@@ -180,3 +182,30 @@ def tau_chain(_metropolis_hastings):
 
 def acceptance(_metropolis_hastings):
     return _metropolis_hastings[0][2]
+
+
+# TODO: figure out how to name each coupling block
+@torch.no_grad()
+def yield_configs_layerwise(loaded_model, base_dist, sample_size, layer_id):
+    v, _ = base_dist(sample_size)
+    if layer_id == 0:
+        return v
+
+    negative_mag = (v.sum(dim=1).sign() < 0).nonzero().squeeze()
+
+    i = 1
+    for block in loaded_model:
+        v, _ = block(v, 0, negative_mag)
+        # only want coupling layers
+        if len([tensor for tensor in block.state_dict().values()]) > 1:
+            if i == layer_id:
+                return v
+            else:
+                i += 1
+
+
+_configs_from_model = collect("yield_configs_layerwise", ("training_context",))
+
+
+def configs_from_model(_configs_from_model):
+    return _configs_from_model[0]