Skip to content

Commit

Permalink
only allow explicit specification of models
Browse files Browse the repository at this point in the history
  • Loading branch information
wilsonmr committed Apr 30, 2021
1 parent 7b6b3c5 commit 4122eb7
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 147 deletions.
7 changes: 2 additions & 5 deletions anvil/benchmark_config/free_scalar_train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@ couplings:
# Model
base: gaussian

#model: rational_quadratic_spline
#model: real_nvp
model: nice
model_params:
n_affine: 2
n_additive: 2
layer: nice
n_blocks: 2
hidden_shape: [36]
activation: tanh
z2_equivar: True
Expand Down
26 changes: 4 additions & 22 deletions anvil/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from anvil.geometry import Geometry2D
from anvil.checkpoint import TrainingOutput
from anvil.models import MODEL_OPTIONS, LOADED_MODEL_OPTIONS
from anvil.models import LAYER_OPTIONS
from anvil.distributions import BASE_OPTIONS, TARGET_OPTIONS

from random import randint
Expand Down Expand Up @@ -84,30 +84,12 @@ def parse_parameterisation(self, param: str):
return param

@explicit_node
def produce_model_action(self, model: str):
def produce_layer_action(self, layer: str):
"""Given a string, return the flow model action indexed by that string."""
try:
return MODEL_OPTIONS[model]
return LAYER_OPTIONS[layer]
except KeyError:
raise ConfigError(f"Invalid model {model}", model, MODEL_OPTIONS.keys())

@explicit_node
def produce_model_to_load(self, model: str, model_params):
"""Decides whether to load sequential model or a preset combination"""
if isinstance(model_params, list):
inner_models = {inner.get("model") for inner in model_params}
if ("sequential_model" in inner_models) or (None in inner_models):
raise ConfigError(
"Inner models cannot be undefined or `sequential_model`",
inner_models,
MODEL_OPTIONS.keys()
)
if model != "sequential_model":
raise ConfigError(
"model_params can only be a list when the model is `sequential_model`"
)
return LOADED_MODEL_OPTIONS["sequential_model"]
return LOADED_MODEL_OPTIONS["preset_model"]
raise ConfigError(f"Invalid model {layer}", layer, LAYER_OPTIONS.keys())

def parse_n_batch(self, nb: int):
"""Batch size for training."""
Expand Down
6 changes: 3 additions & 3 deletions anvil/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def forward(self, v_in, log_density, *unused) -> torch.Tensor:
s_out = self.s_network(v_for_net)
t_out = self.t_network(v_for_net)

# If enforcing s(-v) = -s(v), we want to use |s(v)| in affine transf.
# If enforcing s(-v) = s(v), we want to use |s(v)| in affine transf.
if self.z2_equivar:
s_out = torch.abs(s_out)

Expand Down Expand Up @@ -259,9 +259,9 @@ def forward(self, v_in, log_density, negative_mag):
v_in_passive - v_in_passive.mean()
) / v_in_passive.std() # reduce numerical instability

# Naively enforce C(-v) = -C(v)
# Naively enforce C(-v) = C(v)
if self.z2_equivar:
v_in_passive_stand[negative_mag] = -v_in_passive_stand[negative_mag]
v_for_net[negative_mag] = -v_for_net[negative_mag]

v_out_b = torch.zeros_like(v_in_active)
gradient = torch.ones_like(v_in_active).unsqueeze(dim=-1)
Expand Down
211 changes: 151 additions & 60 deletions anvil/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
"""
models.py
Module containing reportengine actions which return callable objects that execute
normalising flows constructed from multiple layers via function composition.
Module containing reportengine actions which return normalising flow models.
Generally this involves piecing together components from :py:mod:`anvil.layers`
and :py:mod:`anvil.core` to produce sequences of transformations.
"""
from functools import partial

Expand All @@ -14,122 +16,211 @@
import anvil.layers as layers


def coupling_pair(coupling_layer, size_half, **layer_spec):
"""Helper function which returns a callable object that performs a coupling
transformation on both even and odd lattice sites."""
coupling_transformation = partial(coupling_layer, size_half, **layer_spec)
def _coupling_pair(coupling_layer, **kwargs):
"""Helper function which wraps a pair of coupling layers from
:py:mod:`anvil.layers` in the module container
:py:class`anvil.core.Sequential`. The first transformation layer acts on
the even sites and the second transformation acts on the odd sites, so one
of these blocks ensures all sites are transformed as part of an
active partition.
"""
coupling_transformation = partial(coupling_layer, **kwargs)
return Sequential(
coupling_transformation(even_sites=True),
coupling_transformation(even_sites=False),
)


def real_nvp(
def _real_nvp(
size_half,
n_affine,
n_blocks,
hidden_shape,
activation="tanh",
z2_equivar=False,
z2_equivar=True,
):
"""Action that returns a callable object that performs a sequence of `n_affine`
affine coupling transformations on both partitions of the input vector."""
r"""Action which returns a sequence of ``n_blocks`` pairs of
:py:class:`anvil.layers.AffineLayer` s, followed by a single
:py:class:`anvil.layers.GlobalRescaling` all wrapped in the module container
:py:class`anvil.core.Sequential`.
The first ``n_blocks`` elements of the outer ``Sequential``
are ``Sequential`` s containing a pair of ``AffineLayer`` s which
act on the even and odd sites respectively.
Parameters
----------
size_half: int
Inferred from ``lattice_size``, the size of the active/passive
partitions (which are equal size, `lattice_size / 2`).
n_blocks: int
The number of pairs of :py:class:`anvil.layers.AffineLayer`
transformations.
hidden_shape: list[int]
the shape of the neural networks used in the AffineLayer. The visible
layers are defined by the ``lattice_size``. Typically we have found
a single hidden layer neural network is effective, which can be
specified by passing a list of length 1, i.e. ``[72]`` would
be a single hidden layered network with 72 nodes in the hidden layer.
activation: str, default="tanh"
The activation function to use for each hidden layer. The output layer
of the network is linear (has no activation function).
z2_equivar: bool, default=True
Whether or not to impose z2 equivariance. This changes the transformation
such that the neural networks have no bias term and s(-v) = s(v) which
imposes a :math:`\mathbb{Z}_2` symmetry.
Returns
-------
real_nvp: anvil.core.Sequential
A sequence of affine transformations, which we refer to as a real NVP
(Non-volume preserving) flow.
See Also
--------
:py:mod:`anvil.core` contains the fully connected neural network class
as well as valid choices for activation functions.
"""
blocks = [
coupling_pair(
_coupling_pair(
layers.AffineLayer,
size_half,
size_half=size_half,
hidden_shape=hidden_shape,
activation=activation,
z2_equivar=z2_equivar,
)
for i in range(n_affine)
for i in range(n_blocks)
]
return Sequential(*blocks, layers.GlobalRescaling())


def nice(
def _nice(
size_half,
n_additive,
n_blocks,
hidden_shape,
activation="tanh",
z2_equivar=False,
z2_equivar=True,
):
"""Action that returns a callable object that performs a sequence of `n_affine`
affine coupling transformations on both partitions of the input vector."""
"""Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
:py:class:`layers.AdditiveLayer` s followed by a single
:py:class:`layers.GlobalRescaling`. The pairs of ``AdditiveLayer`` s
act on the even and odd sites respectively.
Parameters
----------
size_half: int
Inferred from ``lattice_size``, the size of the active/passive
partitions (which are equal size, `lattice_size / 2`).
n_blocks: int
The number of pairs of :py:class:`anvil.layers.AffineLayer`
transformations.
hidden_shape: list[int]
the shape of the neural networks used in the each layer. The visible
layers are defined by the ``lattice_size``.
activation: str, default="tanh"
The activation function to use for each hidden layer. The output layer
of the network is linear (has no activation function).
z2_equivar: bool, default=True
Whether or not to impose z2 equivariance. This changes the transformation
such that the neural networks have no bias term and s(-v) = s(v) which
imposes a :math:`\mathbb{Z}_2` symmetry.
Returns
-------
nice: anvil.core.Sequential
A sequence of additive transformations, which we refer to as a
nice flow.
"""
blocks = [
coupling_pair(
_coupling_pair(
layers.AdditiveLayer,
size_half,
size_half=size_half,
hidden_shape=hidden_shape,
activation=activation,
z2_equivar=z2_equivar,
)
for i in range(n_additive)
for i in range(n_blocks)
]
return Sequential(*blocks, layers.GlobalRescaling())


def rational_quadratic_spline(
def _rational_quadratic_spline(
size_half,
hidden_shape,
interval=5,
n_spline=1,
n_blocks=1,
n_segments=4,
activation="tanh",
z2_equivar_spline=False,
z2_equivar=False,
):
"""Action that returns a callable object that performs a pair of circular spline
transformations, one on each half of the input vector."""
"""Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
:py:class:`layers.RationalQuadraticSplineLayer` s followed by a single
:py:class:`layers.GlobalRescaling`. The pairs of RQS's
act on the even and odd sites respectively.
Parameters
----------
size_half: int
inferred from ``lattice_size``, the size of the active/passive
partitions (which are equal size, `lattice_size / 2`).
hidden_shape: list[int]
the shape of the neural networks used in the each layer. The visible
layers are defined by the ``lattice_size``.
interval: int, default=5
the interval within which the RQS applies the transformation, at present
if a field variable is outside of this region it is mapped to itself
(i.e the gradient of the transformation is 1 outside of the interval).
n_blocks: int, default=1
The number of pairs of :py:class:`anvil.layers.AffineLayer`
transformations. For RQS this is set to 1.
n_segments: int, default=4
The number of segments to use in the RQS transformation.
activation: str, default="tanh"
The activation function to use for each hidden layer. The output layer
of the network is linear (has no activation function).
z2_equivar: bool, default=False
Whether or not to impose z2 equivariance. This is only done crudely
by splitting the sites according to the sign of the sum across lattice
sites.
"""
blocks = [
coupling_pair(
_coupling_pair(
layers.RationalQuadraticSplineLayer,
size_half,
size_half=size_half,
interval=interval,
n_segments=n_segments,
hidden_shape=hidden_shape,
activation=activation,
z2_equivar=z2_equivar_spline,
z2_equivar=z2_equivar,
)
for _ in range(n_spline)
for _ in range(n_blocks)
]
return Sequential(
#layers.BatchNormLayer(),
*blocks,
layers.GlobalRescaling(),
)

_normalising_flow = collect("layer_action", ("model_params",))

def spline_affine(real_nvp, rational_quadratic_spline):
return Sequential(rational_quadratic_spline, real_nvp)


def affine_spline(real_nvp, rational_quadratic_spline):
return Sequential(real_nvp, rational_quadratic_spline)


_normalising_flow = collect("model_action", ("model_params",))

def preset_model(_normalising_flow):
return _normalising_flow[0]


def sequential_model(_normalising_flow):
"""action which wraps a list of affine models in
def model_to_load(_normalising_flow):
"""action which wraps a list of layers in
:py:class:`anvil.core.Sequential`. This allows the user to specify an
arbitrary combination of layers as the model
arbitrary combination of layers as the model.
For more information
on valid choices for layers, see :py:var:`LAYER_OPTIONS` or the various
functions in :py:mod:`anvil.models` which produce sequences of the layers
found in :py:mod:`anvil.layers`.
"""
return Sequential(*_normalising_flow)

MODEL_OPTIONS = {
"nice": nice,
"real_nvp": real_nvp,
"rational_quadratic_spline": rational_quadratic_spline,
"spline_affine": spline_affine,
"affine_spline": affine_spline,
}


LOADED_MODEL_OPTIONS = {
"preset_model": preset_model,
"sequential_model": sequential_model
LAYER_OPTIONS = {
"nice": _nice,
"real_nvp": _real_nvp,
"rational_quadratic_spline": _rational_quadratic_spline,
}
24 changes: 13 additions & 11 deletions examples/runcards/train.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Example of how to specify a custom sequential model explicitly.

# Lattice
lattice_length: 6
lattice_dimension: 2
Expand All @@ -12,18 +14,18 @@ couplings:
# Model
base: gaussian

model: affine_spline
model_params:
hidden_shape: [72]
activation: tanh

n_affine: 2
z2_equivar: true

n_spline: 1
n_segments: 8
z2_equivar_spline: false

- layer: real_nvp
n_blocks: 2
z2_equivar: true
activation: tanh
hidden_shape: [72]
- layer: rational_quadratic_spline
n_blocks: 1
n_segments: 8
z2_equivar: false
activation: tanh
hidden_shape: [72]

# Training
n_batch: 1000
Expand Down
Loading

0 comments on commit 4122eb7

Please sign in to comment.