From 592658feb3b53f31511a59729c35e11520dcae7e Mon Sep 17 00:00:00 2001 From: marshrossney <17361029+marshrossney@users.noreply.github.com> Date: Mon, 26 Apr 2021 16:31:14 +0100 Subject: [PATCH] layer-wise histograms of weights and field variables --- anvil/benchmark_config/free_scalar_sample.yml | 3 + anvil/models.py | 30 +++-- anvil/plot.py | 115 +++++------------- anvil/sample.py | 29 ++++- examples/runcards/report.md | 3 + 5 files changed, 87 insertions(+), 93 deletions(-) diff --git a/anvil/benchmark_config/free_scalar_sample.yml b/anvil/benchmark_config/free_scalar_sample.yml index 96cc6a2..884ea0a 100644 --- a/anvil/benchmark_config/free_scalar_sample.yml +++ b/anvil/benchmark_config/free_scalar_sample.yml @@ -16,6 +16,9 @@ template_text: | # Eigenvalues of kinetic operator {@plot_kinetic_eigenvalues@} {@table_kinetic_eigenvalues@} + # Layer-wise breakdown + {@plot_layerwise_histograms@} + {@plot_layerwise_weights@} actions_: - report(main=True) diff --git a/anvil/models.py b/anvil/models.py index 465e29c..c8b7a2f 100644 --- a/anvil/models.py +++ b/anvil/models.py @@ -4,14 +4,15 @@ Module containing reportengine actions which return callable objects that execute normalising flows constructed from multiple layers via function composition. """ +import torch from functools import partial +from reportengine import collect from anvil.core import Sequential - import anvil.layers as layers -def coupling_pair(coupling_layer, size_half, **layer_spec): +def coupling_block(coupling_layer, size_half, **layer_spec): """Helper function which returns a callable object that performs a coupling transformation on both even and odd lattice sites.""" coupling_transformation = partial(coupling_layer, size_half, **layer_spec) @@ -31,7 +32,7 @@ def real_nvp( """Action that returns a callable object that performs a sequence of `n_affine` affine coupling transformations on both partitions of the input vector.""" blocks = [ - coupling_pair( + coupling_block( layers.AffineLayer, size_half, hidden_shape=hidden_shape, @@ -53,7 +54,7 @@ def nice( """Action that returns a callable object that performs a sequence of `n_affine` affine coupling transformations on both partitions of the input vector.""" blocks = [ - coupling_pair( + coupling_block( layers.AdditiveLayer, size_half, hidden_shape=hidden_shape, @@ -74,10 +75,10 @@ def rational_quadratic_spline( activation="tanh", z2_equivar_spline=False, ): - """Action that returns a callable object that performs a pair of circular spline + """Action that returns a callable object that performs a block of circular spline transformations, one on each half of the input vector.""" blocks = [ - coupling_pair( + coupling_block( layers.RationalQuadraticSplineLayer, size_half, interval=interval, @@ -96,12 +97,25 @@ def rational_quadratic_spline( def spline_affine(real_nvp, rational_quadratic_spline): - return Sequential(rational_quadratic_spline, real_nvp) + return Sequential(*rational_quadratic_spline, *real_nvp) def affine_spline(real_nvp, rational_quadratic_spline): - return Sequential(real_nvp, rational_quadratic_spline) + return Sequential(*real_nvp, *rational_quadratic_spline) + +# TODO replace this +_loaded_model = collect("loaded_model", ("training_context",)) + + +def model_weights(_loaded_model): + model = _loaded_model[0] + for block in model: + params = { + key: tensor.flatten().numpy() for key, tensor in block.state_dict().items() + } + if len(params) > 1: # only want coupling layers + yield params MODEL_OPTIONS = { "nice": nice, diff --git a/anvil/plot.py b/anvil/plot.py index 9e9efc4..a74cabc 100644 --- a/anvil/plot.py +++ b/anvil/plot.py @@ -7,6 +7,7 @@ import torch import numpy as np import matplotlib.pyplot as plt +import matplotlib as mpl from matplotlib.ticker import MaxNLocator from reportengine.figure import figure, figuregen @@ -15,93 +16,35 @@ from anvil.observables import cosh_shift -def field_component(i, x_base, phi_model, base_neg, model_neg): - fig, ax = plt.subplots() - - ax.hist(x_base, bins=50, density=True, histtype="step", label="base") - ax.hist(phi_model, bins=50, density=True, histtype="step", label="model, full") - ax.hist( - base_neg, bins=50, density=True, histtype="step", label="model, $M_{base} < 0$" - ) - ax.hist( - model_neg, bins=50, density=True, histtype="step", label="model, $M_{mod} < 0$" - ) - ax.set_title(f"Coordinate {i}") - ax.legend() - fig.tight_layout() - return fig - - -def field_components(loaded_model, base_dist, lattice_size): - """Plot the distributions of base coordinates 'x' and output coordinates 'phi' and, - if known, plot the pdf of the target distribution.""" - sample_size = 10000 - - # Generate a large sample from the base distribution and pass it through the trained model - with torch.no_grad(): - x_base, _ = base_dist(sample_size) - sign = x_base.sum(dim=1).sign() - neg = (sign < 0).nonzero().squeeze() - phi_model, model_log_density = loaded_model(x_base, 0, neg) - - base_neg = phi_model[neg] - - sign = phi_model.sum(dim=1).sign() - neg = (sign < 0).nonzero().squeeze() - model_neg = phi_model[neg] - - # Convert to shape (n_coords, sample_size * lattice_size) - # NOTE: this is all pointless for the 1-component scalar - x_base = x_base.reshape(sample_size * lattice_size, -1).transpose(0, 1) - phi_model = phi_model.reshape(sample_size * lattice_size, -1).transpose(0, 1) - - base_neg = base_neg.reshape(1, -1) - model_neg = model_neg.reshape(1, -1) +# TODO: subplots for different neural networks +def plot_layer_weights(model_weights): + for weights in model_weights: + fig, ax = plt.subplots() + labels = list(weights.keys()) + data = weights.values() + ax.hist(data, bins=50, stacked=True, label=labels) + fig.legend() + yield fig - for i in range(x_base.shape[0]): - yield field_component(i, x_base[i], phi_model[i], base_neg[i], model_neg[i]) - -_plot_field_components = collect("field_components", ("training_context",)) - - -def example_configs(loaded_model, base_dist, training_geometry): - sample_size = 10 - - # Generate a large sample from the base distribution and pass it through the trained model - with torch.no_grad(): - x_base, _ = base_dist(sample_size) - sign = x_base.sum(dim=1).sign() - neg = (sign < 0).nonzero().squeeze() - phi_model, model_log_density = loaded_model(x_base, 0, neg) - - L = int(np.sqrt(phi_model.shape[1])) - - phi_true = np.zeros((4, L, L)) - phi_true[:, training_geometry.checkerboard] = phi_model[:4, : L ** 2 // 2] - phi_true[:, ~training_geometry.checkerboard] = phi_model[:4, L ** 2 // 2 :] - - fig, axes = plt.subplots(2, 2, sharex=True, sharey=True) - for i, ax in enumerate(axes.flatten()): - conf = ax.imshow(phi_true[i]) - fig.colorbar(conf, ax=ax) - - fig.suptitle("Example configurations") - - return fig - - -_plot_example_configs = collect("example_configs", ("training_context",)) +@figuregen +def plot_layerwise_weights(plot_layer_weights): + yield from plot_layer_weights -@figure -def plot_example_configs(_plot_example_configs): - return _plot_example_configs[0] +def plot_layer_histogram(layerwise_configs): + for v in layerwise_configs: + v = v.numpy() + v_pos = v[v.sum(axis=1) > 0].flatten() + v_neg = v[v.sum(axis=1) < 0].flatten() + fig, ax = plt.subplots() + ax.hist([v_pos, v_neg], bins=50, density=True, histtype="step") + yield fig @figuregen -def plot_field_components(_plot_field_components): - yield from _plot_field_components[0] +def plot_layerwise_histograms(plot_layer_histogram): + yield from plot_layer_histogram @figure @@ -172,9 +115,12 @@ def plot_two_point_correlator(two_point_correlator): """ corr = two_point_correlator.mean(axis=-1) - std = two_point_correlator.std(axis=-1) + error = two_point_correlator.std(axis=-1) + fractional_error = np.abs(error / corr) + L = corr.shape[0] - fractional_std = std / abs(corr) + corr = np.roll(corr, (-L // 2 - 1, -L // 2 - 1), (0, 1)) + fractional_error = np.roll(fractional_error, (-L // 2 - 1, -L // 2 - 1), (0, 1)) fig, (ax_mean, ax_std) = plt.subplots(1, 2, figsize=(13, 6), sharey=True) ax_std.set_title(r"$\sigma_G / G$") @@ -182,9 +128,10 @@ def plot_two_point_correlator(two_point_correlator): ax_mean.set_xlabel("$x$") ax_std.set_xlabel("$x$") ax_mean.set_ylabel("$t$") + norm = mpl.colors.LogNorm() - im1 = ax_mean.imshow(corr) - im2 = ax_std.imshow(fractional_std) + im1 = ax_mean.imshow(corr, norm=norm) + im2 = ax_std.imshow(fractional_error, norm=norm) ax_mean.yaxis.set_major_locator(MaxNLocator(integer=True)) ax_mean.xaxis.set_major_locator(MaxNLocator(integer=True)) diff --git a/anvil/sample.py b/anvil/sample.py index 466972b..3402877 100644 --- a/anvil/sample.py +++ b/anvil/sample.py @@ -120,7 +120,9 @@ def metropolis_hastings( history.append(0) tau = calc_tau_chain(history) - log.info(f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}") + log.info( + f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}" + ) sample_interval = ceil(2 * tau) # update sample interval log.info(f"Using sampling interval: {sample_interval}") @@ -180,3 +182,28 @@ def tau_chain(_metropolis_hastings): def acceptance(_metropolis_hastings): return _metropolis_hastings[0][2] + + +# TODO: figure out how to name each coupling block +@torch.no_grad() +def yield_configs_layerwise(loaded_model, base_dist, metropolis_hastings): + v, _ = base_dist(BATCH_SIZE) + yield v + + negative_mag = (v.sum(dim=1).sign() < 0).nonzero().squeeze() + + for block in loaded_model: + v, _ = block(v, 0, negative_mag) + # only want coupling layers + if len([tensor for tensor in block.state_dict().values()]) > 1: + yield v + + v = metropolis_hastings[0] + yield v + + +_layerwise_configs = collect("yield_configs_layerwise", ("training_context",)) + + +def layerwise_configs(_layerwise_configs): + return _layerwise_configs[0] diff --git a/examples/runcards/report.md b/examples/runcards/report.md index ab9f770..edab757 100644 --- a/examples/runcards/report.md +++ b/examples/runcards/report.md @@ -16,3 +16,6 @@ {@plot_magnetization_series@} {@plot_magnetization_autocorr@} {@plot_magnetization_integrated_autocorr@} +## Layer-wise breakdown +{@plot_layerwise_histograms@} +{@plot_layerwise_weights@}