From 21c0af31c365f4bbefa630dd58ede66548edbfe6 Mon Sep 17 00:00:00 2001
From: wilsonm <michael.wilson@ed.ac.uk>
Date: Thu, 29 Apr 2021 16:17:39 +0100
Subject: [PATCH] only allow explicit specification of models

---
 anvil/benchmark_config/free_scalar_train.yml |   7 +-
 anvil/config.py                              |  26 +--
 anvil/layers.py                              |   6 +-
 anvil/models.py                              | 211 +++++++++++++------
 examples/runcards/train.yml                  |  24 ++-
 examples/runcards/train_sequential_model.yml |  46 ----
 6 files changed, 173 insertions(+), 147 deletions(-)
 delete mode 100644 examples/runcards/train_sequential_model.yml

diff --git a/anvil/benchmark_config/free_scalar_train.yml b/anvil/benchmark_config/free_scalar_train.yml
index 5edd89e..9b85f20 100644
--- a/anvil/benchmark_config/free_scalar_train.yml
+++ b/anvil/benchmark_config/free_scalar_train.yml
@@ -12,12 +12,9 @@ couplings:
 # Model
 base: gaussian
 
-#model: rational_quadratic_spline
-#model: real_nvp
-model: nice
 model_params:
-    n_affine: 2
-    n_additive: 2
+    layer: nice
+    n_blocks: 2
     hidden_shape: [36]
     activation: tanh
     z2_equivar: True
diff --git a/anvil/config.py b/anvil/config.py
index 96953d0..7eeb68e 100644
--- a/anvil/config.py
+++ b/anvil/config.py
@@ -13,7 +13,7 @@
 
 from anvil.geometry import Geometry2D
 from anvil.checkpoint import TrainingOutput
-from anvil.models import MODEL_OPTIONS, LOADED_MODEL_OPTIONS
+from anvil.models import LAYER_OPTIONS
 from anvil.distributions import BASE_OPTIONS, TARGET_OPTIONS
 
 from random import randint
@@ -84,30 +84,12 @@ def parse_parameterisation(self, param: str):
         return param
 
     @explicit_node
-    def produce_model_action(self, model: str):
+    def produce_layer_action(self, layer: str):
         """Given a string, return the flow model action indexed by that string."""
         try:
-            return MODEL_OPTIONS[model]
+            return LAYER_OPTIONS[layer]
         except KeyError:
-            raise ConfigError(f"Invalid model {model}", model, MODEL_OPTIONS.keys())
-
-    @explicit_node
-    def produce_model_to_load(self, model: str, model_params):
-        """Decides whether to load sequential model or a preset combination"""
-        if isinstance(model_params, list):
-            inner_models = {inner.get("model") for inner in model_params}
-            if ("sequential_model" in inner_models) or (None in inner_models):
-                raise ConfigError(
-                    "Inner models cannot be undefined or `sequential_model`",
-                    inner_models,
-                    MODEL_OPTIONS.keys()
-                )
-            if model != "sequential_model":
-                raise ConfigError(
-                    "model_params can only be a list when the model is `sequential_model`"
-                )
-            return LOADED_MODEL_OPTIONS["sequential_model"]
-        return LOADED_MODEL_OPTIONS["preset_model"]
+            raise ConfigError(f"Invalid model {layer}", layer, LAYER_OPTIONS.keys())
 
     def parse_n_batch(self, nb: int):
         """Batch size for training."""
diff --git a/anvil/layers.py b/anvil/layers.py
index af8997f..70296ba 100644
--- a/anvil/layers.py
+++ b/anvil/layers.py
@@ -186,7 +186,7 @@ def forward(self, v_in, log_density, *unused) -> torch.Tensor:
         s_out = self.s_network(v_for_net)
         t_out = self.t_network(v_for_net)
 
-        # If enforcing s(-v) = -s(v), we want to use |s(v)| in affine transf.
+        # If enforcing s(-v) = s(v), we want to use |s(v)| in affine transf.
         if self.z2_equivar:
             s_out = torch.abs(s_out)
 
@@ -259,9 +259,9 @@ def forward(self, v_in, log_density, negative_mag):
             v_in_passive - v_in_passive.mean()
         ) / v_in_passive.std()  # reduce numerical instability
 
-        # Naively enforce C(-v) = -C(v)
+        # Naively enforce C(-v) = C(v)
         if self.z2_equivar:
-            v_in_passive_stand[negative_mag] = -v_in_passive_stand[negative_mag]
+            v_for_net[negative_mag] = -v_for_net[negative_mag]
 
         v_out_b = torch.zeros_like(v_in_active)
         gradient = torch.ones_like(v_in_active).unsqueeze(dim=-1)
diff --git a/anvil/models.py b/anvil/models.py
index d079c09..15faec6 100644
--- a/anvil/models.py
+++ b/anvil/models.py
@@ -3,8 +3,10 @@
 """
 models.py
 
-Module containing reportengine actions which return callable objects that execute
-normalising flows constructed from multiple layers via function composition.
+Module containing reportengine actions which return normalising flow models.
+Generally this involves piecing together components from :py:mod:`anvil.layers`
+and :py:mod:`anvil.core` to produce sequences of transformations.
+
 """
 from functools import partial
 
@@ -14,82 +16,187 @@
 import anvil.layers as layers
 
 
-def coupling_pair(coupling_layer, size_half, **layer_spec):
-    """Helper function which returns a callable object that performs a coupling
-    transformation on both even and odd lattice sites."""
-    coupling_transformation = partial(coupling_layer, size_half, **layer_spec)
+def _coupling_pair(coupling_layer, **kwargs):
+    """Helper function which wraps a pair of coupling layers from
+    :py:mod:`anvil.layers` in the module container
+    :py:class`anvil.core.Sequential`. The first transformation layer acts on
+    the even sites and the second transformation acts on the odd sites, so one
+    of these blocks ensures all sites are transformed as part of an
+    active partition.
+
+    """
+    coupling_transformation = partial(coupling_layer, **kwargs)
     return Sequential(
         coupling_transformation(even_sites=True),
         coupling_transformation(even_sites=False),
     )
 
 
-def real_nvp(
+def _real_nvp(
     size_half,
-    n_affine,
+    n_blocks,
     hidden_shape,
     activation="tanh",
-    z2_equivar=False,
+    z2_equivar=True,
 ):
-    """Action that returns a callable object that performs a sequence of `n_affine`
-    affine coupling transformations on both partitions of the input vector."""
+    r"""Action which returns a sequence of ``n_blocks`` pairs of
+    :py:class:`anvil.layers.AffineLayer` s, followed by a single
+    :py:class:`anvil.layers.GlobalRescaling` all wrapped in the module container
+    :py:class`anvil.core.Sequential`.
+
+    The first ``n_blocks`` elements of the outer ``Sequential``
+    are ``Sequential`` s containing a pair of ``AffineLayer`` s which
+    act on the even and odd sites respectively.
+
+    Parameters
+    ----------
+    size_half: int
+        Inferred from ``lattice_size``, the size of the active/passive
+        partitions (which are equal size, `lattice_size / 2`).
+    n_blocks: int
+        The number of pairs of :py:class:`anvil.layers.AffineLayer`
+        transformations.
+    hidden_shape: list[int]
+        the shape of the neural networks used in the AffineLayer. The visible
+        layers are defined by the ``lattice_size``. Typically we have found
+        a single hidden layer neural network is effective, which can be
+        specified by passing a list of length 1, i.e. ``[72]`` would
+        be a single hidden layered network with 72 nodes in the hidden layer.
+    activation: str, default="tanh"
+        The activation function to use for each hidden layer. The output layer
+        of the network is linear (has no activation function).
+    z2_equivar: bool, default=True
+        Whether or not to impose z2 equivariance. This changes the transformation
+        such that the neural networks have no bias term and s(-v) = s(v) which
+        imposes a :math:`\mathbb{Z}_2` symmetry.
+
+    Returns
+    -------
+    real_nvp: anvil.core.Sequential
+        A sequence of affine transformations, which we refer to as a real NVP
+        (Non-volume preserving) flow.
+
+    See Also
+    --------
+    :py:mod:`anvil.core` contains the fully connected neural network class
+    as well as valid choices for activation functions.
+
+    """
     blocks = [
-        coupling_pair(
+        _coupling_pair(
             layers.AffineLayer,
-            size_half,
+            size_half=size_half,
             hidden_shape=hidden_shape,
             activation=activation,
             z2_equivar=z2_equivar,
         )
-        for i in range(n_affine)
+        for i in range(n_blocks)
     ]
     return Sequential(*blocks, layers.GlobalRescaling())
 
 
-def nice(
+def _nice(
     size_half,
-    n_additive,
+    n_blocks,
     hidden_shape,
     activation="tanh",
-    z2_equivar=False,
+    z2_equivar=True,
 ):
-    """Action that returns a callable object that performs a sequence of `n_affine`
-    affine coupling transformations on both partitions of the input vector."""
+    """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
+    :py:class:`layers.AdditiveLayer` s followed by a single
+    :py:class:`layers.GlobalRescaling`. The pairs of ``AdditiveLayer`` s
+    act on the even and odd sites respectively.
+
+    Parameters
+    ----------
+    size_half: int
+        Inferred from ``lattice_size``, the size of the active/passive
+        partitions (which are equal size, `lattice_size / 2`).
+    n_blocks: int
+        The number of pairs of :py:class:`anvil.layers.AffineLayer`
+        transformations.
+    hidden_shape: list[int]
+        the shape of the neural networks used in the each layer. The visible
+        layers are defined by the ``lattice_size``.
+    activation: str, default="tanh"
+        The activation function to use for each hidden layer. The output layer
+        of the network is linear (has no activation function).
+    z2_equivar: bool, default=True
+        Whether or not to impose z2 equivariance. This changes the transformation
+        such that the neural networks have no bias term and s(-v) = s(v) which
+        imposes a :math:`\mathbb{Z}_2` symmetry.
+
+    Returns
+    -------
+    nice: anvil.core.Sequential
+        A sequence of additive transformations, which we refer to as a
+        nice flow.
+
+    """
     blocks = [
-        coupling_pair(
+        _coupling_pair(
             layers.AdditiveLayer,
-            size_half,
+            size_half=size_half,
             hidden_shape=hidden_shape,
             activation=activation,
             z2_equivar=z2_equivar,
         )
-        for i in range(n_additive)
+        for i in range(n_blocks)
     ]
     return Sequential(*blocks, layers.GlobalRescaling())
 
 
-def rational_quadratic_spline(
+def _rational_quadratic_spline(
     size_half,
     hidden_shape,
     interval=5,
-    n_spline=1,
+    n_blocks=1,
     n_segments=4,
     activation="tanh",
-    z2_equivar_spline=False,
+    z2_equivar=False,
 ):
-    """Action that returns a callable object that performs a pair of circular spline
-    transformations, one on each half of the input vector."""
+    """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
+    :py:class:`layers.RationalQuadraticSplineLayer` s followed by a single
+    :py:class:`layers.GlobalRescaling`. The pairs of RQS's
+    act on the even and odd sites respectively.
+
+    Parameters
+    ----------
+    size_half: int
+        inferred from ``lattice_size``, the size of the active/passive
+        partitions (which are equal size, `lattice_size / 2`).
+    hidden_shape: list[int]
+        the shape of the neural networks used in the each layer. The visible
+        layers are defined by the ``lattice_size``.
+    interval: int, default=5
+        the interval within which the RQS applies the transformation, at present
+        if a field variable is outside of this region it is mapped to itself
+        (i.e the gradient of the transformation is 1 outside of the interval).
+    n_blocks: int, default=1
+        The number of pairs of :py:class:`anvil.layers.AffineLayer`
+        transformations. For RQS this is set to 1.
+    n_segments: int, default=4
+        The number of segments to use in the RQS transformation.
+    activation: str, default="tanh"
+        The activation function to use for each hidden layer. The output layer
+        of the network is linear (has no activation function).
+    z2_equivar: bool, default=False
+        Whether or not to impose z2 equivariance. This is only done crudely
+        by splitting the sites according to the sign of the sum across lattice
+        sites.
+
+    """
     blocks = [
-        coupling_pair(
+        _coupling_pair(
             layers.RationalQuadraticSplineLayer,
-            size_half,
+            size_half=size_half,
             interval=interval,
             n_segments=n_segments,
             hidden_shape=hidden_shape,
             activation=activation,
-            z2_equivar=z2_equivar_spline,
+            z2_equivar=z2_equivar,
         )
-        for _ in range(n_spline)
+        for _ in range(n_blocks)
     ]
     return Sequential(
         #layers.BatchNormLayer(),
@@ -97,39 +204,23 @@ def rational_quadratic_spline(
         layers.GlobalRescaling(),
     )
 
+_normalising_flow = collect("layer_action", ("model_params",))
 
-def spline_affine(real_nvp, rational_quadratic_spline):
-    return Sequential(rational_quadratic_spline, real_nvp)
-
-
-def affine_spline(real_nvp, rational_quadratic_spline):
-    return Sequential(real_nvp, rational_quadratic_spline)
-
-
-_normalising_flow = collect("model_action", ("model_params",))
-
-def preset_model(_normalising_flow):
-    return _normalising_flow[0]
-
-
-def sequential_model(_normalising_flow):
-    """action which wraps a list of affine models in
+def model_to_load(_normalising_flow):
+    """action which wraps a list of layers in
     :py:class:`anvil.core.Sequential`. This allows the user to specify an
-    arbitrary combination of layers as the model
+    arbitrary combination of layers as the model.
+
+    For more information
+    on valid choices for layers, see :py:var:`LAYER_OPTIONS` or the various
+    functions in :py:mod:`anvil.models` which produce sequences of the layers
+    found in :py:mod:`anvil.layers`.
 
     """
     return Sequential(*_normalising_flow)
 
-MODEL_OPTIONS = {
-    "nice": nice,
-    "real_nvp": real_nvp,
-    "rational_quadratic_spline": rational_quadratic_spline,
-    "spline_affine": spline_affine,
-    "affine_spline": affine_spline,
-}
-
-
-LOADED_MODEL_OPTIONS = {
-    "preset_model": preset_model,
-    "sequential_model": sequential_model
+LAYER_OPTIONS = {
+    "nice": _nice,
+    "real_nvp": _real_nvp,
+    "rational_quadratic_spline": _rational_quadratic_spline,
 }
diff --git a/examples/runcards/train.yml b/examples/runcards/train.yml
index f9c319a..945fa86 100644
--- a/examples/runcards/train.yml
+++ b/examples/runcards/train.yml
@@ -1,3 +1,5 @@
+# Example of how to specify a custom sequential model explicitly.
+
 # Lattice
 lattice_length: 6
 lattice_dimension: 2
@@ -12,18 +14,18 @@ couplings:
 # Model
 base: gaussian
 
-model: affine_spline
 model_params:
-    hidden_shape: [72]
-    activation: tanh
-    
-    n_affine: 2
-    z2_equivar: true
-
-    n_spline: 1
-    n_segments: 8
-    z2_equivar_spline: false
-
+ - layer: real_nvp
+   n_blocks: 2
+   z2_equivar: true
+   activation: tanh
+   hidden_shape: [72]
+ - layer: rational_quadratic_spline
+   n_blocks: 1
+   n_segments: 8
+   z2_equivar: false
+   activation: tanh
+   hidden_shape: [72]
 
 # Training
 n_batch: 1000
diff --git a/examples/runcards/train_sequential_model.yml b/examples/runcards/train_sequential_model.yml
deleted file mode 100644
index 81c735c..0000000
--- a/examples/runcards/train_sequential_model.yml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Example of how to specify a custom sequential model explicitly.
-
-# Lattice
-lattice_length: 6
-lattice_dimension: 2
-
-# Target
-target: phi_four
-parameterisation: albergo2019
-couplings:
-    m_sq: -4
-    lam: 6.975
-
-# Model
-base: gaussian
-
-model: sequential_model
-
-model_params:
- - model: real_nvp
-   n_affine: 2
-   z2_equivar: true
-   activation: tanh
-   hidden_shape: [72]
- - model: rational_quadratic_spline
-   n_spline: 1
-   n_segments: 8
-   z2_equivar_spline: false
-   activation: tanh
-   hidden_shape: [72]
-
-# Training
-n_batch: 1000
-epochs: 2000
-save_interval: 1000
-
-# Optimizer
-optimizer: Adam
-optimizer_params:
-    lr: 0.005
-
-# Scheduler
-scheduler: CosineAnnealingLR
-scheduler_params:
-    T_max: 2000
-