only allow explicit specification of models

wilsonmr · Apr 29, 2021 · 21c0af3 · 21c0af3
1 parent 02c103d
commit 21c0af3
Show file tree

Hide file tree

Showing 6 changed files with 173 additions and 147 deletions.
diff --git a/anvil/benchmark_config/free_scalar_train.yml b/anvil/benchmark_config/free_scalar_train.yml
@@ -12,12 +12,9 @@ couplings:
 # Model
 base: gaussian
 
-#model: rational_quadratic_spline
-#model: real_nvp
-model: nice
 model_params:
-    n_affine: 2
-    n_additive: 2
+    layer: nice
+    n_blocks: 2
     hidden_shape: [36]
     activation: tanh
     z2_equivar: True

diff --git a/anvil/config.py b/anvil/config.py
@@ -13,7 +13,7 @@
 
 from anvil.geometry import Geometry2D
 from anvil.checkpoint import TrainingOutput
-from anvil.models import MODEL_OPTIONS, LOADED_MODEL_OPTIONS
+from anvil.models import LAYER_OPTIONS
 from anvil.distributions import BASE_OPTIONS, TARGET_OPTIONS
 
 from random import randint
@@ -84,30 +84,12 @@ def parse_parameterisation(self, param: str):
         return param
 
     @explicit_node
-    def produce_model_action(self, model: str):
+    def produce_layer_action(self, layer: str):
         """Given a string, return the flow model action indexed by that string."""
         try:
-            return MODEL_OPTIONS[model]
+            return LAYER_OPTIONS[layer]
         except KeyError:
-            raise ConfigError(f"Invalid model {model}", model, MODEL_OPTIONS.keys())
-
-    @explicit_node
-    def produce_model_to_load(self, model: str, model_params):
-        """Decides whether to load sequential model or a preset combination"""
-        if isinstance(model_params, list):
-            inner_models = {inner.get("model") for inner in model_params}
-            if ("sequential_model" in inner_models) or (None in inner_models):
-                raise ConfigError(
-                    "Inner models cannot be undefined or `sequential_model`",
-                    inner_models,
-                    MODEL_OPTIONS.keys()
-                )
-            if model != "sequential_model":
-                raise ConfigError(
-                    "model_params can only be a list when the model is `sequential_model`"
-                )
-            return LOADED_MODEL_OPTIONS["sequential_model"]
-        return LOADED_MODEL_OPTIONS["preset_model"]
+            raise ConfigError(f"Invalid model {layer}", layer, LAYER_OPTIONS.keys())
 
     def parse_n_batch(self, nb: int):
         """Batch size for training."""

diff --git a/anvil/layers.py b/anvil/layers.py
@@ -186,7 +186,7 @@ def forward(self, v_in, log_density, *unused) -> torch.Tensor:
         s_out = self.s_network(v_for_net)
         t_out = self.t_network(v_for_net)
 
-        # If enforcing s(-v) = -s(v), we want to use |s(v)| in affine transf.
+        # If enforcing s(-v) = s(v), we want to use |s(v)| in affine transf.
         if self.z2_equivar:
             s_out = torch.abs(s_out)
 
@@ -259,9 +259,9 @@ def forward(self, v_in, log_density, negative_mag):
             v_in_passive - v_in_passive.mean()
         ) / v_in_passive.std()  # reduce numerical instability
 
-        # Naively enforce C(-v) = -C(v)
+        # Naively enforce C(-v) = C(v)
         if self.z2_equivar:
-            v_in_passive_stand[negative_mag] = -v_in_passive_stand[negative_mag]
+            v_for_net[negative_mag] = -v_for_net[negative_mag]
 
         v_out_b = torch.zeros_like(v_in_active)
         gradient = torch.ones_like(v_in_active).unsqueeze(dim=-1)

diff --git a/anvil/models.py b/anvil/models.py
@@ -3,8 +3,10 @@
 """
 models.py
 
-Module containing reportengine actions which return callable objects that execute
-normalising flows constructed from multiple layers via function composition.
+Module containing reportengine actions which return normalising flow models.
+Generally this involves piecing together components from :py:mod:`anvil.layers`
+and :py:mod:`anvil.core` to produce sequences of transformations.
+
 """
 from functools import partial
 
@@ -14,122 +16,211 @@
 import anvil.layers as layers
 
 
-def coupling_pair(coupling_layer, size_half, **layer_spec):
-    """Helper function which returns a callable object that performs a coupling
-    transformation on both even and odd lattice sites."""
-    coupling_transformation = partial(coupling_layer, size_half, **layer_spec)
+def _coupling_pair(coupling_layer, **kwargs):
+    """Helper function which wraps a pair of coupling layers from
+    :py:mod:`anvil.layers` in the module container
+    :py:class`anvil.core.Sequential`. The first transformation layer acts on
+    the even sites and the second transformation acts on the odd sites, so one
+    of these blocks ensures all sites are transformed as part of an
+    active partition.
+
+    """
+    coupling_transformation = partial(coupling_layer, **kwargs)
     return Sequential(
         coupling_transformation(even_sites=True),
         coupling_transformation(even_sites=False),
     )
 
 
-def real_nvp(
+def _real_nvp(
     size_half,
-    n_affine,
+    n_blocks,
     hidden_shape,
     activation="tanh",
-    z2_equivar=False,
+    z2_equivar=True,
 ):
-    """Action that returns a callable object that performs a sequence of `n_affine`
-    affine coupling transformations on both partitions of the input vector."""
+    r"""Action which returns a sequence of ``n_blocks`` pairs of
+    :py:class:`anvil.layers.AffineLayer` s, followed by a single
+    :py:class:`anvil.layers.GlobalRescaling` all wrapped in the module container
+    :py:class`anvil.core.Sequential`.
+
+    The first ``n_blocks`` elements of the outer ``Sequential``
+    are ``Sequential`` s containing a pair of ``AffineLayer`` s which
+    act on the even and odd sites respectively.
+
+    Parameters
+    ----------
+    size_half: int
+        Inferred from ``lattice_size``, the size of the active/passive
+        partitions (which are equal size, `lattice_size / 2`).
+    n_blocks: int
+        The number of pairs of :py:class:`anvil.layers.AffineLayer`
+        transformations.
+    hidden_shape: list[int]
+        the shape of the neural networks used in the AffineLayer. The visible
+        layers are defined by the ``lattice_size``. Typically we have found
+        a single hidden layer neural network is effective, which can be
+        specified by passing a list of length 1, i.e. ``[72]`` would
+        be a single hidden layered network with 72 nodes in the hidden layer.
+    activation: str, default="tanh"
+        The activation function to use for each hidden layer. The output layer
+        of the network is linear (has no activation function).
+    z2_equivar: bool, default=True
+        Whether or not to impose z2 equivariance. This changes the transformation
+        such that the neural networks have no bias term and s(-v) = s(v) which
+        imposes a :math:`\mathbb{Z}_2` symmetry.
+
+    Returns
+    -------
+    real_nvp: anvil.core.Sequential
+        A sequence of affine transformations, which we refer to as a real NVP
+        (Non-volume preserving) flow.
+
+    See Also
+    --------
+    :py:mod:`anvil.core` contains the fully connected neural network class
+    as well as valid choices for activation functions.
+
+    """
     blocks = [
-        coupling_pair(
+        _coupling_pair(
             layers.AffineLayer,
-            size_half,
+            size_half=size_half,
             hidden_shape=hidden_shape,
             activation=activation,
             z2_equivar=z2_equivar,
         )
-        for i in range(n_affine)
+        for i in range(n_blocks)
     ]
     return Sequential(*blocks, layers.GlobalRescaling())
 
 
-def nice(
+def _nice(
     size_half,
-    n_additive,
+    n_blocks,
     hidden_shape,
     activation="tanh",
-    z2_equivar=False,
+    z2_equivar=True,
 ):
-    """Action that returns a callable object that performs a sequence of `n_affine`
-    affine coupling transformations on both partitions of the input vector."""
+    """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
+    :py:class:`layers.AdditiveLayer` s followed by a single
+    :py:class:`layers.GlobalRescaling`. The pairs of ``AdditiveLayer`` s
+    act on the even and odd sites respectively.
+
+    Parameters
+    ----------
+    size_half: int
+        Inferred from ``lattice_size``, the size of the active/passive
+        partitions (which are equal size, `lattice_size / 2`).
+    n_blocks: int
+        The number of pairs of :py:class:`anvil.layers.AffineLayer`
+        transformations.
+    hidden_shape: list[int]
+        the shape of the neural networks used in the each layer. The visible
+        layers are defined by the ``lattice_size``.
+    activation: str, default="tanh"
+        The activation function to use for each hidden layer. The output layer
+        of the network is linear (has no activation function).
+    z2_equivar: bool, default=True
+        Whether or not to impose z2 equivariance. This changes the transformation
+        such that the neural networks have no bias term and s(-v) = s(v) which
+        imposes a :math:`\mathbb{Z}_2` symmetry.
+
+    Returns
+    -------
+    nice: anvil.core.Sequential
+        A sequence of additive transformations, which we refer to as a
+        nice flow.
+
+    """
     blocks = [
-        coupling_pair(
+        _coupling_pair(
             layers.AdditiveLayer,
-            size_half,
+            size_half=size_half,
             hidden_shape=hidden_shape,
             activation=activation,
             z2_equivar=z2_equivar,
         )
-        for i in range(n_additive)
+        for i in range(n_blocks)
     ]
     return Sequential(*blocks, layers.GlobalRescaling())
 
 
-def rational_quadratic_spline(
+def _rational_quadratic_spline(
     size_half,
     hidden_shape,
     interval=5,
-    n_spline=1,
+    n_blocks=1,
     n_segments=4,
     activation="tanh",
-    z2_equivar_spline=False,
+    z2_equivar=False,
 ):
-    """Action that returns a callable object that performs a pair of circular spline
-    transformations, one on each half of the input vector."""
+    """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
+    :py:class:`layers.RationalQuadraticSplineLayer` s followed by a single
+    :py:class:`layers.GlobalRescaling`. The pairs of RQS's
+    act on the even and odd sites respectively.
+
+    Parameters
+    ----------
+    size_half: int
+        inferred from ``lattice_size``, the size of the active/passive
+        partitions (which are equal size, `lattice_size / 2`).
+    hidden_shape: list[int]
+        the shape of the neural networks used in the each layer. The visible
+        layers are defined by the ``lattice_size``.
+    interval: int, default=5
+        the interval within which the RQS applies the transformation, at present
+        if a field variable is outside of this region it is mapped to itself
+        (i.e the gradient of the transformation is 1 outside of the interval).
+    n_blocks: int, default=1
+        The number of pairs of :py:class:`anvil.layers.AffineLayer`
+        transformations. For RQS this is set to 1.
+    n_segments: int, default=4
+        The number of segments to use in the RQS transformation.
+    activation: str, default="tanh"
+        The activation function to use for each hidden layer. The output layer
+        of the network is linear (has no activation function).
+    z2_equivar: bool, default=False
+        Whether or not to impose z2 equivariance. This is only done crudely
+        by splitting the sites according to the sign of the sum across lattice
+        sites.
+
+    """
     blocks = [
-        coupling_pair(
+        _coupling_pair(
             layers.RationalQuadraticSplineLayer,
-            size_half,
+            size_half=size_half,
             interval=interval,
             n_segments=n_segments,
             hidden_shape=hidden_shape,
             activation=activation,
-            z2_equivar=z2_equivar_spline,
+            z2_equivar=z2_equivar,
         )
-        for _ in range(n_spline)
+        for _ in range(n_blocks)
     ]
     return Sequential(
         #layers.BatchNormLayer(),
         *blocks,
         layers.GlobalRescaling(),
     )
 
+_normalising_flow = collect("layer_action", ("model_params",))
 
-def spline_affine(real_nvp, rational_quadratic_spline):
-    return Sequential(rational_quadratic_spline, real_nvp)
-
-
-def affine_spline(real_nvp, rational_quadratic_spline):
-    return Sequential(real_nvp, rational_quadratic_spline)
-
-
-_normalising_flow = collect("model_action", ("model_params",))
-
-def preset_model(_normalising_flow):
-    return _normalising_flow[0]
-
-
-def sequential_model(_normalising_flow):
-    """action which wraps a list of affine models in
+def model_to_load(_normalising_flow):
+    """action which wraps a list of layers in
     :py:class:`anvil.core.Sequential`. This allows the user to specify an
-    arbitrary combination of layers as the model
+    arbitrary combination of layers as the model.
+
+    For more information
+    on valid choices for layers, see :py:var:`LAYER_OPTIONS` or the various
+    functions in :py:mod:`anvil.models` which produce sequences of the layers
+    found in :py:mod:`anvil.layers`.
 
     """
     return Sequential(*_normalising_flow)
 
-MODEL_OPTIONS = {
-    "nice": nice,
-    "real_nvp": real_nvp,
-    "rational_quadratic_spline": rational_quadratic_spline,
-    "spline_affine": spline_affine,
-    "affine_spline": affine_spline,
-}
-
-
-LOADED_MODEL_OPTIONS = {
-    "preset_model": preset_model,
-    "sequential_model": sequential_model
+LAYER_OPTIONS = {
+    "nice": _nice,
+    "real_nvp": _real_nvp,
+    "rational_quadratic_spline": _rational_quadratic_spline,
 }
diff --git a/examples/runcards/train.yml b/examples/runcards/train.yml
@@ -1,3 +1,5 @@
+# Example of how to specify a custom sequential model explicitly.
+
 # Lattice
 lattice_length: 6
 lattice_dimension: 2
@@ -12,18 +14,18 @@ couplings:
 # Model
 base: gaussian
 
-model: affine_spline
 model_params:
-    hidden_shape: [72]
-    activation: tanh
-
-    n_affine: 2
-    z2_equivar: true
-
-    n_spline: 1
-    n_segments: 8
-    z2_equivar_spline: false
-
+ - layer: real_nvp
+   n_blocks: 2
+   z2_equivar: true
+   activation: tanh
+   hidden_shape: [72]
+ - layer: rational_quadratic_spline
+   n_blocks: 1
+   n_segments: 8
+   z2_equivar: false
+   activation: tanh
+   hidden_shape: [72]
 
 # Training
 n_batch: 1000