From 592658feb3b53f31511a59729c35e11520dcae7e Mon Sep 17 00:00:00 2001
From: marshrossney <17361029+marshrossney@users.noreply.github.com>
Date: Mon, 26 Apr 2021 16:31:14 +0100
Subject: [PATCH] layer-wise histograms of weights and field variables

---
 anvil/benchmark_config/free_scalar_sample.yml |   3 +
 anvil/models.py                               |  30 +++--
 anvil/plot.py                                 | 115 +++++-------------
 anvil/sample.py                               |  29 ++++-
 examples/runcards/report.md                   |   3 +
 5 files changed, 87 insertions(+), 93 deletions(-)

diff --git a/anvil/benchmark_config/free_scalar_sample.yml b/anvil/benchmark_config/free_scalar_sample.yml
index 96cc6a2..884ea0a 100644
--- a/anvil/benchmark_config/free_scalar_sample.yml
+++ b/anvil/benchmark_config/free_scalar_sample.yml
@@ -16,6 +16,9 @@ template_text: |
  # Eigenvalues of kinetic operator
  {@plot_kinetic_eigenvalues@}
  {@table_kinetic_eigenvalues@}
+ # Layer-wise breakdown
+ {@plot_layerwise_histograms@}
+ {@plot_layerwise_weights@}
 
 actions_:
   - report(main=True)
diff --git a/anvil/models.py b/anvil/models.py
index 465e29c..c8b7a2f 100644
--- a/anvil/models.py
+++ b/anvil/models.py
@@ -4,14 +4,15 @@
 Module containing reportengine actions which return callable objects that execute
 normalising flows constructed from multiple layers via function composition.
 """
+import torch
 from functools import partial
+from reportengine import collect
 
 from anvil.core import Sequential
-
 import anvil.layers as layers
 
 
-def coupling_pair(coupling_layer, size_half, **layer_spec):
+def coupling_block(coupling_layer, size_half, **layer_spec):
     """Helper function which returns a callable object that performs a coupling
     transformation on both even and odd lattice sites."""
     coupling_transformation = partial(coupling_layer, size_half, **layer_spec)
@@ -31,7 +32,7 @@ def real_nvp(
     """Action that returns a callable object that performs a sequence of `n_affine`
     affine coupling transformations on both partitions of the input vector."""
     blocks = [
-        coupling_pair(
+        coupling_block(
             layers.AffineLayer,
             size_half,
             hidden_shape=hidden_shape,
@@ -53,7 +54,7 @@ def nice(
     """Action that returns a callable object that performs a sequence of `n_affine`
     affine coupling transformations on both partitions of the input vector."""
     blocks = [
-        coupling_pair(
+        coupling_block(
             layers.AdditiveLayer,
             size_half,
             hidden_shape=hidden_shape,
@@ -74,10 +75,10 @@ def rational_quadratic_spline(
     activation="tanh",
     z2_equivar_spline=False,
 ):
-    """Action that returns a callable object that performs a pair of circular spline
+    """Action that returns a callable object that performs a block of circular spline
     transformations, one on each half of the input vector."""
     blocks = [
-        coupling_pair(
+        coupling_block(
             layers.RationalQuadraticSplineLayer,
             size_half,
             interval=interval,
@@ -96,12 +97,25 @@ def rational_quadratic_spline(
 
 
 def spline_affine(real_nvp, rational_quadratic_spline):
-    return Sequential(rational_quadratic_spline, real_nvp)
+    return Sequential(*rational_quadratic_spline, *real_nvp)
 
 
 def affine_spline(real_nvp, rational_quadratic_spline):
-    return Sequential(real_nvp, rational_quadratic_spline)
+    return Sequential(*real_nvp, *rational_quadratic_spline)
+
+# TODO replace this
+_loaded_model = collect("loaded_model", ("training_context",))
+
+
+def model_weights(_loaded_model):
+    model = _loaded_model[0]
 
+    for block in model:
+        params = {
+            key: tensor.flatten().numpy() for key, tensor in block.state_dict().items()
+        }
+        if len(params) > 1:  # only want coupling layers
+            yield params
 
 MODEL_OPTIONS = {
     "nice": nice,
diff --git a/anvil/plot.py b/anvil/plot.py
index 9e9efc4..a74cabc 100644
--- a/anvil/plot.py
+++ b/anvil/plot.py
@@ -7,6 +7,7 @@
 import torch
 import numpy as np
 import matplotlib.pyplot as plt
+import matplotlib as mpl
 from matplotlib.ticker import MaxNLocator
 
 from reportengine.figure import figure, figuregen
@@ -15,93 +16,35 @@
 from anvil.observables import cosh_shift
 
 
-def field_component(i, x_base, phi_model, base_neg, model_neg):
-    fig, ax = plt.subplots()
-
-    ax.hist(x_base, bins=50, density=True, histtype="step", label="base")
-    ax.hist(phi_model, bins=50, density=True, histtype="step", label="model, full")
-    ax.hist(
-        base_neg, bins=50, density=True, histtype="step", label="model, $M_{base} < 0$"
-    )
-    ax.hist(
-        model_neg, bins=50, density=True, histtype="step", label="model, $M_{mod} < 0$"
-    )
-    ax.set_title(f"Coordinate {i}")
-    ax.legend()
-    fig.tight_layout()
-    return fig
-
-
-def field_components(loaded_model, base_dist, lattice_size):
-    """Plot the distributions of base coordinates 'x' and output coordinates 'phi' and,
-    if known, plot the pdf of the target distribution."""
-    sample_size = 10000
-
-    # Generate a large sample from the base distribution and pass it through the trained model
-    with torch.no_grad():
-        x_base, _ = base_dist(sample_size)
-        sign = x_base.sum(dim=1).sign()
-        neg = (sign < 0).nonzero().squeeze()
-        phi_model, model_log_density = loaded_model(x_base, 0, neg)
-
-    base_neg = phi_model[neg]
-
-    sign = phi_model.sum(dim=1).sign()
-    neg = (sign < 0).nonzero().squeeze()
-    model_neg = phi_model[neg]
-
-    # Convert to shape (n_coords, sample_size * lattice_size)
-    # NOTE: this is all pointless for the 1-component scalar
-    x_base = x_base.reshape(sample_size * lattice_size, -1).transpose(0, 1)
-    phi_model = phi_model.reshape(sample_size * lattice_size, -1).transpose(0, 1)
-
-    base_neg = base_neg.reshape(1, -1)
-    model_neg = model_neg.reshape(1, -1)
+# TODO: subplots for different neural networks
+def plot_layer_weights(model_weights):
+    for weights in model_weights:
+        fig, ax = plt.subplots()
+        labels = list(weights.keys())
+        data = weights.values()
+        ax.hist(data, bins=50, stacked=True, label=labels)
+        fig.legend()
+        yield fig
 
-    for i in range(x_base.shape[0]):
-        yield field_component(i, x_base[i], phi_model[i], base_neg[i], model_neg[i])
 
-
-_plot_field_components = collect("field_components", ("training_context",))
-
-
-def example_configs(loaded_model, base_dist, training_geometry):
-    sample_size = 10
-
-    # Generate a large sample from the base distribution and pass it through the trained model
-    with torch.no_grad():
-        x_base, _ = base_dist(sample_size)
-        sign = x_base.sum(dim=1).sign()
-        neg = (sign < 0).nonzero().squeeze()
-        phi_model, model_log_density = loaded_model(x_base, 0, neg)
-
-    L = int(np.sqrt(phi_model.shape[1]))
-
-    phi_true = np.zeros((4, L, L))
-    phi_true[:, training_geometry.checkerboard] = phi_model[:4, : L ** 2 // 2]
-    phi_true[:, ~training_geometry.checkerboard] = phi_model[:4, L ** 2 // 2 :]
-
-    fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
-    for i, ax in enumerate(axes.flatten()):
-        conf = ax.imshow(phi_true[i])
-        fig.colorbar(conf, ax=ax)
-
-    fig.suptitle("Example configurations")
-
-    return fig
-
-
-_plot_example_configs = collect("example_configs", ("training_context",))
+@figuregen
+def plot_layerwise_weights(plot_layer_weights):
+    yield from plot_layer_weights
 
 
-@figure
-def plot_example_configs(_plot_example_configs):
-    return _plot_example_configs[0]
+def plot_layer_histogram(layerwise_configs):
+    for v in layerwise_configs:
+        v = v.numpy()
+        v_pos = v[v.sum(axis=1) > 0].flatten()
+        v_neg = v[v.sum(axis=1) < 0].flatten()
+        fig, ax = plt.subplots()
+        ax.hist([v_pos, v_neg], bins=50, density=True, histtype="step")
+        yield fig
 
 
 @figuregen
-def plot_field_components(_plot_field_components):
-    yield from _plot_field_components[0]
+def plot_layerwise_histograms(plot_layer_histogram):
+    yield from plot_layer_histogram
 
 
 @figure
@@ -172,9 +115,12 @@ def plot_two_point_correlator(two_point_correlator):
 
     """
     corr = two_point_correlator.mean(axis=-1)
-    std = two_point_correlator.std(axis=-1)
+    error = two_point_correlator.std(axis=-1)
+    fractional_error = np.abs(error / corr)
+    L = corr.shape[0]
 
-    fractional_std = std / abs(corr)
+    corr = np.roll(corr, (-L // 2 - 1, -L // 2 - 1), (0, 1))
+    fractional_error = np.roll(fractional_error, (-L // 2 - 1, -L // 2 - 1), (0, 1))
 
     fig, (ax_mean, ax_std) = plt.subplots(1, 2, figsize=(13, 6), sharey=True)
     ax_std.set_title(r"$\sigma_G / G$")
@@ -182,9 +128,10 @@ def plot_two_point_correlator(two_point_correlator):
     ax_mean.set_xlabel("$x$")
     ax_std.set_xlabel("$x$")
     ax_mean.set_ylabel("$t$")
+    norm = mpl.colors.LogNorm()
 
-    im1 = ax_mean.imshow(corr)
-    im2 = ax_std.imshow(fractional_std)
+    im1 = ax_mean.imshow(corr, norm=norm)
+    im2 = ax_std.imshow(fractional_error, norm=norm)
 
     ax_mean.yaxis.set_major_locator(MaxNLocator(integer=True))
     ax_mean.xaxis.set_major_locator(MaxNLocator(integer=True))
diff --git a/anvil/sample.py b/anvil/sample.py
index 466972b..3402877 100644
--- a/anvil/sample.py
+++ b/anvil/sample.py
@@ -120,7 +120,9 @@ def metropolis_hastings(
                 history.append(0)
 
         tau = calc_tau_chain(history)
-        log.info(f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}")
+        log.info(
+            f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}"
+        )
         sample_interval = ceil(2 * tau)  # update sample interval
 
     log.info(f"Using sampling interval: {sample_interval}")
@@ -180,3 +182,28 @@ def tau_chain(_metropolis_hastings):
 
 def acceptance(_metropolis_hastings):
     return _metropolis_hastings[0][2]
+
+
+# TODO: figure out how to name each coupling block
+@torch.no_grad()
+def yield_configs_layerwise(loaded_model, base_dist, metropolis_hastings):
+    v, _ = base_dist(BATCH_SIZE)
+    yield v
+
+    negative_mag = (v.sum(dim=1).sign() < 0).nonzero().squeeze()
+
+    for block in loaded_model:
+        v, _ = block(v, 0, negative_mag)
+        # only want coupling layers
+        if len([tensor for tensor in block.state_dict().values()]) > 1:
+            yield v
+
+    v = metropolis_hastings[0]
+    yield v
+
+
+_layerwise_configs = collect("yield_configs_layerwise", ("training_context",))
+
+
+def layerwise_configs(_layerwise_configs):
+    return _layerwise_configs[0]
diff --git a/examples/runcards/report.md b/examples/runcards/report.md
index ab9f770..edab757 100644
--- a/examples/runcards/report.md
+++ b/examples/runcards/report.md
@@ -16,3 +16,6 @@
 {@plot_magnetization_series@}
 {@plot_magnetization_autocorr@}
 {@plot_magnetization_integrated_autocorr@}
+## Layer-wise breakdown
+{@plot_layerwise_histograms@}
+{@plot_layerwise_weights@}