Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Do not merge] Extend post-processing and plotting to act on intermediate layers of model #64

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions anvil/benchmark_config/free_scalar_sample.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
training_output: /tmp/del_me_anvil_benchmark
cp_id: -1
layer_id: -1

sample_size: 100000
thermalization: 10
Expand Down
19 changes: 15 additions & 4 deletions anvil/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from anvil.checkpoint import TrainingOutput
from anvil.models import MODEL_OPTIONS
from anvil.distributions import BASE_OPTIONS, TARGET_OPTIONS
import anvil.sample as sample

from random import randint
from sys import maxsize
Expand Down Expand Up @@ -121,6 +122,17 @@ def produce_checkpoint(self, cp_id=None, training_output=None):
# get index from training_output class
return training_output.checkpoints[training_output.cp_ids.index(cp_id)]

@element_of("layer_ids")
def parse_layer_id(self, layer_id: int = -1):
return layer_id

@explicit_node
def produce_configs(self, layer_id):
if layer_id == -1:
return sample.configs_from_metropolis
else:
return sample.configs_from_model

def produce_training_context(self, training_output):
"""Given a training output produce the context of that training"""
# NOTE: This seems a bit hacky, exposing the entire training configuration
Expand Down Expand Up @@ -183,12 +195,11 @@ def parse_bootstrap_sample_size(self, n_boot: int):
log.warning(f"Using user specified bootstrap sample size: {n_boot}")
return n_boot

def produce_bootstrap_seed(
self, manual_bootstrap_seed: (int, type(None)) = None):
def produce_bootstrap_seed(self, manual_bootstrap_seed: (int, type(None)) = None):
if manual_bootstrap_seed is None:
return randint(0, maxsize)
# numpy is actually this strict but let's keep it sensible.
if (manual_bootstrap_seed < 0) or (manual_bootstrap_seed > 2**32):
if (manual_bootstrap_seed < 0) or (manual_bootstrap_seed > 2 ** 32):
raise ConfigError("Seed is outside of appropriate range: [0, 2 ** 32]")
return manual_bootstrap_seed

Expand All @@ -213,4 +224,4 @@ def produce_use_multiprocessing(self):
"""Don't use mp on MacOS"""
if platform.system() == "Darwin":
return False
return True
return True
30 changes: 22 additions & 8 deletions anvil/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
Module containing reportengine actions which return callable objects that execute
normalising flows constructed from multiple layers via function composition.
"""
import torch
from functools import partial
from reportengine import collect

from anvil.core import Sequential

import anvil.layers as layers


def coupling_pair(coupling_layer, size_half, **layer_spec):
def coupling_block(coupling_layer, size_half, **layer_spec):
"""Helper function which returns a callable object that performs a coupling
transformation on both even and odd lattice sites."""
coupling_transformation = partial(coupling_layer, size_half, **layer_spec)
Expand All @@ -31,7 +32,7 @@ def real_nvp(
"""Action that returns a callable object that performs a sequence of `n_affine`
affine coupling transformations on both partitions of the input vector."""
blocks = [
coupling_pair(
coupling_block(
layers.AffineLayer,
size_half,
hidden_shape=hidden_shape,
Expand All @@ -53,7 +54,7 @@ def nice(
"""Action that returns a callable object that performs a sequence of `n_affine`
affine coupling transformations on both partitions of the input vector."""
blocks = [
coupling_pair(
coupling_block(
layers.AdditiveLayer,
size_half,
hidden_shape=hidden_shape,
Expand All @@ -74,10 +75,10 @@ def rational_quadratic_spline(
activation="tanh",
z2_equivar_spline=False,
):
"""Action that returns a callable object that performs a pair of circular spline
"""Action that returns a callable object that performs a block of circular spline
transformations, one on each half of the input vector."""
blocks = [
coupling_pair(
coupling_block(
layers.RationalQuadraticSplineLayer,
size_half,
interval=interval,
Expand All @@ -96,12 +97,25 @@ def rational_quadratic_spline(


def spline_affine(real_nvp, rational_quadratic_spline):
return Sequential(rational_quadratic_spline, real_nvp)
return Sequential(*rational_quadratic_spline, *real_nvp)


def affine_spline(real_nvp, rational_quadratic_spline):
return Sequential(real_nvp, rational_quadratic_spline)
return Sequential(*real_nvp, *rational_quadratic_spline)

# TODO replace this
_loaded_model = collect("loaded_model", ("training_context",))


def model_weights(_loaded_model):
model = _loaded_model[0]

for block in model:
params = {
key: tensor.flatten().numpy() for key, tensor in block.state_dict().items()
}
if len(params) > 1: # only want coupling layers
yield params

MODEL_OPTIONS = {
"nice": nice,
Expand Down
8 changes: 6 additions & 2 deletions anvil/observables.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

log = logging.getLogger(__name__)


def cosh_shift(x, xi, A, c):
return A * np.cosh(-x / xi) + c

Expand Down Expand Up @@ -42,8 +43,11 @@ def fit_zero_momentum_correlator(zero_momentum_correlator, training_geometry):


def correlation_length_from_fit(fit_zero_momentum_correlator):
popt, pcov, _ = fit_zero_momentum_correlator
return popt[0], np.sqrt(pcov[0, 0])
if fit_zero_momentum_correlator is not None:
popt, pcov, _ = fit_zero_momentum_correlator
return popt[0], np.sqrt(pcov[0, 0])
else:
return None, None


def autocorrelation(chain):
Expand Down
128 changes: 42 additions & 86 deletions anvil/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import MaxNLocator

from reportengine.figure import figure, figuregen
Expand All @@ -15,93 +16,44 @@
from anvil.observables import cosh_shift


def field_component(i, x_base, phi_model, base_neg, model_neg):
fig, ax = plt.subplots()

ax.hist(x_base, bins=50, density=True, histtype="step", label="base")
ax.hist(phi_model, bins=50, density=True, histtype="step", label="model, full")
ax.hist(
base_neg, bins=50, density=True, histtype="step", label="model, $M_{base} < 0$"
)
ax.hist(
model_neg, bins=50, density=True, histtype="step", label="model, $M_{mod} < 0$"
)
ax.set_title(f"Coordinate {i}")
ax.legend()
fig.tight_layout()
return fig


def field_components(loaded_model, base_dist, lattice_size):
"""Plot the distributions of base coordinates 'x' and output coordinates 'phi' and,
if known, plot the pdf of the target distribution."""
sample_size = 10000

# Generate a large sample from the base distribution and pass it through the trained model
with torch.no_grad():
x_base, _ = base_dist(sample_size)
sign = x_base.sum(dim=1).sign()
neg = (sign < 0).nonzero().squeeze()
phi_model, model_log_density = loaded_model(x_base, 0, neg)

base_neg = phi_model[neg]

sign = phi_model.sum(dim=1).sign()
neg = (sign < 0).nonzero().squeeze()
model_neg = phi_model[neg]

# Convert to shape (n_coords, sample_size * lattice_size)
# NOTE: this is all pointless for the 1-component scalar
x_base = x_base.reshape(sample_size * lattice_size, -1).transpose(0, 1)
phi_model = phi_model.reshape(sample_size * lattice_size, -1).transpose(0, 1)

base_neg = base_neg.reshape(1, -1)
model_neg = model_neg.reshape(1, -1)

for i in range(x_base.shape[0]):
yield field_component(i, x_base[i], phi_model[i], base_neg[i], model_neg[i])

# TODO: subplots for different neural networks
def plot_layer_weights(model_weights):
for weights in model_weights:
fig, ax = plt.subplots()
labels = list(weights.keys())
data = weights.values()
ax.hist(data, bins=50, stacked=True, label=labels)
fig.legend()
yield fig

_plot_field_components = collect("field_components", ("training_context",))

@figuregen
def plot_layerwise_weights(plot_layer_weights):
yield from plot_layer_weights

def example_configs(loaded_model, base_dist, training_geometry):
sample_size = 10

# Generate a large sample from the base distribution and pass it through the trained model
with torch.no_grad():
x_base, _ = base_dist(sample_size)
sign = x_base.sum(dim=1).sign()
neg = (sign < 0).nonzero().squeeze()
phi_model, model_log_density = loaded_model(x_base, 0, neg)

L = int(np.sqrt(phi_model.shape[1]))

phi_true = np.zeros((4, L, L))
phi_true[:, training_geometry.checkerboard] = phi_model[:4, : L ** 2 // 2]
phi_true[:, ~training_geometry.checkerboard] = phi_model[:4, L ** 2 // 2 :]

fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i, ax in enumerate(axes.flatten()):
conf = ax.imshow(phi_true[i])
fig.colorbar(conf, ax=ax)

fig.suptitle("Example configurations")

@figure
def plot_layer_histogram(configs):
v = configs.numpy()
v_pos = v[v.sum(axis=1) > 0].flatten()
v_neg = v[v.sum(axis=1) < 0].flatten()
fig, ax = plt.subplots()
ax.hist([v_pos, v_neg], bins=50, density=True, histtype="step")
return fig


_plot_example_configs = collect("example_configs", ("training_context",))


@figure
def plot_example_configs(_plot_example_configs):
return _plot_example_configs[0]


@figuregen
def plot_field_components(_plot_field_components):
yield from _plot_field_components[0]
def plot_correlation_length(table_correlation_length):
fig, ax = plt.subplots()
ax.errorbar(
x=table_correlation_length.index,
y=table_correlation_length.value,
yerr=table_correlation_length.error,
linestyle="",
marker="o",
)
ax.set_xticklabels(table_correlation_length.index, rotation=45)
return fig


@figure
Expand All @@ -120,14 +72,14 @@ def plot_zero_momentum_correlator(

if fit_zero_momentum_correlator is not None:
popt, pcov, t0 = fit_zero_momentum_correlator
shift = popt[2]
xi, A, shift = popt

t = np.linspace(t0, T - t0, 100)
ax.plot(
t,
cosh_shift(t - T // 2, *popt) - popt[2],
cosh_shift(t - T // 2, *popt) - shift,
"r--",
label=r"fit $A \cosh(-(t - T/2) / \xi) + c$",
label=r"fit $A \cosh(-(t - T/2) / \xi) + c$" + "\n" + fr"$\xi = ${xi:.2f}",
)
ax.errorbar(
x=np.arange(T),
Expand Down Expand Up @@ -172,19 +124,23 @@ def plot_two_point_correlator(two_point_correlator):

"""
corr = two_point_correlator.mean(axis=-1)
std = two_point_correlator.std(axis=-1)
error = two_point_correlator.std(axis=-1)
fractional_error = np.abs(error / corr)
L = corr.shape[0]

fractional_std = std / abs(corr)
corr = np.roll(corr, (-L // 2 - 1, -L // 2 - 1), (0, 1))
fractional_error = np.roll(fractional_error, (-L // 2 - 1, -L // 2 - 1), (0, 1))

fig, (ax_mean, ax_std) = plt.subplots(1, 2, figsize=(13, 6), sharey=True)
ax_std.set_title(r"$\sigma_G / G$")
ax_mean.set_title("$G(x, t)$")
ax_mean.set_xlabel("$x$")
ax_std.set_xlabel("$x$")
ax_mean.set_ylabel("$t$")
norm = mpl.colors.LogNorm()

im1 = ax_mean.imshow(corr)
im2 = ax_std.imshow(fractional_std)
im1 = ax_mean.imshow(corr, norm=norm)
im2 = ax_std.imshow(fractional_error, norm=norm)

ax_mean.yaxis.set_major_locator(MaxNLocator(integer=True))
ax_mean.xaxis.set_major_locator(MaxNLocator(integer=True))
Expand Down
33 changes: 31 additions & 2 deletions anvil/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ def metropolis_hastings(
history.append(0)

tau = calc_tau_chain(history)
log.info(f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}")
log.info(
f"Integrated autocorrelation time from preliminary sampling phase: {tau:.2g}"
)
sample_interval = ceil(2 * tau) # update sample interval

log.info(f"Using sampling interval: {sample_interval}")
Expand Down Expand Up @@ -170,7 +172,7 @@ def metropolis_hastings(
_metropolis_hastings = collect("metropolis_hastings", ("training_context",))


def configs(_metropolis_hastings):
def configs_from_metropolis(_metropolis_hastings):
return _metropolis_hastings[0][0]


Expand All @@ -180,3 +182,30 @@ def tau_chain(_metropolis_hastings):

def acceptance(_metropolis_hastings):
return _metropolis_hastings[0][2]


# TODO: figure out how to name each coupling block
@torch.no_grad()
def yield_configs_layerwise(loaded_model, base_dist, sample_size, layer_id):
v, _ = base_dist(sample_size)
if layer_id == 0:
return v

negative_mag = (v.sum(dim=1).sign() < 0).nonzero().squeeze()

i = 1
for block in loaded_model:
v, _ = block(v, 0, negative_mag)
# only want coupling layers
if len([tensor for tensor in block.state_dict().values()]) > 1:
if i == layer_id:
return v
else:
i += 1
Comment on lines +197 to +204
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will need to adjust this after #68 is merged, since the model is no longer necessarily a flat Sequential of coupling blocks.



_configs_from_model = collect("yield_configs_layerwise", ("training_context",))


def configs_from_model(_configs_from_model):
return _configs_from_model[0]
Loading