From 4dbe1a45a1d5487fe4bb3ab68c15dcff2c047ba1 Mon Sep 17 00:00:00 2001
From: wilsonm <michael.wilson@ed.ac.uk>
Date: Fri, 30 Apr 2021 10:52:21 +0100
Subject: [PATCH] move sequential to layers

---
 anvil/core.py   | 11 ---------
 anvil/layers.py | 66 ++++++++++++++++++++++++++++++++++---------------
 anvil/models.py | 38 ++++++++++++++--------------
 3 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/anvil/core.py b/anvil/core.py
index df85ffc..81ea02b 100644
--- a/anvil/core.py
+++ b/anvil/core.py
@@ -17,17 +17,6 @@
 }
 
 
-class Sequential(nn.Sequential):
-    """Modify the nn.Sequential class so that it takes an input vector *and* a
-    value for the current logarithm of the model density, returning an output
-    vector and the updated log density."""
-
-    def forward(self, v, log_density, *args):
-        for module in self:
-            v, log_density = module(v, log_density, *args)
-        return v, log_density
-
-
 class FullyConnectedNeuralNetwork(nn.Module):
     """Generic class for neural networks used in coupling layers.
 
diff --git a/anvil/layers.py b/anvil/layers.py
index 70296ba..f91253f 100644
--- a/anvil/layers.py
+++ b/anvil/layers.py
@@ -3,32 +3,46 @@
 r"""
 layers.py
 
-Contains nn.Modules which implement transformations of input configurations whilst computing
-the Jacobian determinant of the transformation.
-
-Each transformation layers may contain several neural networks or learnable parameters.
-
-A normalising flow, f, can be constructed from multiple layers using function composition:
-
-        f(z) = g_n( ... ( g_2( g_1( z ) ) ) ... )
-
-which is implemented using the architecture provided by torch.nn
+Contains the transformations or "layers" which are the building blocks of
+normalising flows. The layers are implemented using the PyTorch library, which
+in practice means they subclass :py:class:`torch.nn.Module`. For more
+information, check out the PyTorch
+`Module docs <https://pytorch.org/docs/stable/generated/torch.nn.Module.html#torch.nn.Module>`_.
+
+The basic idea is of a flow is to generate a latent variable, in our framework
+this would be using a class in :py:mod:`anvil.distributions`. The latent
+variables are then transformed by sequentially applying the transformation
+layers. The key feature of the transformations is the ability to easily calculate
+the Jacobian determinant. If the base density function is known, then we can
+evaluate the model density exactly.
+
+The bottom line is that we enforce a convention to the ``forward`` method
+of each layer (a special method of :py:class:`torch.nn.Module` subclasses).
+All layers in this module should contain a ``forward`` method which takes two
+:py:class:`torch.Tensor` objects as inputs:
+
+    - a batch of input configurations, dimensions ``(batch size, lattice size)``.
+    - a batch of scalars, dimensions ``(batch size, 1)``, that are the logarithm of the
+      'current' probability density, at this stage in the normalising flow.
 
-All layers in this module contain a `forward` method which takes two torch.tensor objects
-as inputs:
+Each transformation layers may contain several neural networks or learnable
+parameters.
 
-    - a batch of input configurations, dimensions (batch size, lattice size).
+A full normalising flow, f, can be constructed from multiple layers using
+function composition:
 
-    - a batch of scalars, dimensions (batch size, 1), that are the logarithm of the
-      'current' probability density, at this stage in the normalising flow.
+.. math::
 
-and returns two torch.tensor objects:
+        f(z) = g_{N_layers}( \ldots ( g_2( g_1( z ) ) ) \ldots )
 
-    - a batch of configurations \phi which have been transformed according to the 
-      transformation, with the same dimensions as the input configurations.
+As a matter of convenience we provide a subclass of
+:py:class:`torch.nn.Sequential`, which is initialised by passing multiple layers
+as arguments (in the order in which the layers are applied). The main feature
+of our version, :py:class:`Sequential`, is that it conforms to our ``forward``
+convention. From the perspective of the user :py:class:`Sequential` appears
+as a single subclass of :py:class:`torch.nn.Module` which performs the
+full normalising flow transformation :math:`f(z)`.
 
-    - the updated logarithm of the probability density, including the contribution from
-      the Jacobian determinant of this transformation.
 """
 import torch
 import torch.nn as nn
@@ -428,3 +442,15 @@ def forward(self, v_in, log_density, *unused):
         v_out = self.scale * v_in
         log_density -= v_out.shape[-1] * torch.log(self.scale)
         return v_out, log_density
+
+
+class Sequential(nn.Sequential):
+    """Similar to :py:class:`torch.nn.Sequential` except conforms to our
+    ``forward`` convention.
+
+    """
+
+    def forward(self, v, log_density, *args):
+        for module in self:
+            v, log_density = module(v, log_density, *args)
+        return v, log_density
diff --git a/anvil/models.py b/anvil/models.py
index 15faec6..6553e1e 100644
--- a/anvil/models.py
+++ b/anvil/models.py
@@ -5,34 +5,33 @@
 
 Module containing reportengine actions which return normalising flow models.
 Generally this involves piecing together components from :py:mod:`anvil.layers`
-and :py:mod:`anvil.core` to produce sequences of transformations.
+to produce sequences of transformations.
 
 """
 from functools import partial
 
 from reportengine import collect
 
-from anvil.core import Sequential
 import anvil.layers as layers
 
 
 def _coupling_pair(coupling_layer, **kwargs):
     """Helper function which wraps a pair of coupling layers from
     :py:mod:`anvil.layers` in the module container
-    :py:class`anvil.core.Sequential`. The first transformation layer acts on
+    :py:class`layers.Sequential`. The first transformation layer acts on
     the even sites and the second transformation acts on the odd sites, so one
     of these blocks ensures all sites are transformed as part of an
     active partition.
 
     """
     coupling_transformation = partial(coupling_layer, **kwargs)
-    return Sequential(
+    return layers.Sequential(
         coupling_transformation(even_sites=True),
         coupling_transformation(even_sites=False),
     )
 
 
-def _real_nvp(
+def real_nvp(
     size_half,
     n_blocks,
     hidden_shape,
@@ -42,7 +41,7 @@ def _real_nvp(
     r"""Action which returns a sequence of ``n_blocks`` pairs of
     :py:class:`anvil.layers.AffineLayer` s, followed by a single
     :py:class:`anvil.layers.GlobalRescaling` all wrapped in the module container
-    :py:class`anvil.core.Sequential`.
+    :py:class`layers.Sequential`.
 
     The first ``n_blocks`` elements of the outer ``Sequential``
     are ``Sequential`` s containing a pair of ``AffineLayer`` s which
@@ -72,7 +71,7 @@ def _real_nvp(
 
     Returns
     -------
-    real_nvp: anvil.core.Sequential
+    real_nvp: layers.Sequential
         A sequence of affine transformations, which we refer to as a real NVP
         (Non-volume preserving) flow.
 
@@ -92,17 +91,17 @@ def _real_nvp(
         )
         for i in range(n_blocks)
     ]
-    return Sequential(*blocks, layers.GlobalRescaling())
+    return layers.Sequential(*blocks, layers.GlobalRescaling())
 
 
-def _nice(
+def nice(
     size_half,
     n_blocks,
     hidden_shape,
     activation="tanh",
     z2_equivar=True,
 ):
-    """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
+    r"""Similar to :py:func:`real_nvp`, excepts instead wraps pairs of
     :py:class:`layers.AdditiveLayer` s followed by a single
     :py:class:`layers.GlobalRescaling`. The pairs of ``AdditiveLayer`` s
     act on the even and odd sites respectively.
@@ -128,7 +127,7 @@ def _nice(
 
     Returns
     -------
-    nice: anvil.core.Sequential
+    nice: layers.Sequential
         A sequence of additive transformations, which we refer to as a
         nice flow.
 
@@ -143,10 +142,10 @@ def _nice(
         )
         for i in range(n_blocks)
     ]
-    return Sequential(*blocks, layers.GlobalRescaling())
+    return layers.Sequential(*blocks, layers.GlobalRescaling())
 
 
-def _rational_quadratic_spline(
+def rational_quadratic_spline(
     size_half,
     hidden_shape,
     interval=5,
@@ -198,8 +197,7 @@ def _rational_quadratic_spline(
         )
         for _ in range(n_blocks)
     ]
-    return Sequential(
-        #layers.BatchNormLayer(),
+    return layers.Sequential(
         *blocks,
         layers.GlobalRescaling(),
     )
@@ -208,7 +206,7 @@ def _rational_quadratic_spline(
 
 def model_to_load(_normalising_flow):
     """action which wraps a list of layers in
-    :py:class:`anvil.core.Sequential`. This allows the user to specify an
+    :py:class:`layers.Sequential`. This allows the user to specify an
     arbitrary combination of layers as the model.
 
     For more information
@@ -217,10 +215,10 @@ def model_to_load(_normalising_flow):
     found in :py:mod:`anvil.layers`.
 
     """
-    return Sequential(*_normalising_flow)
+    return layers.Sequential(*_normalising_flow)
 
 LAYER_OPTIONS = {
-    "nice": _nice,
-    "real_nvp": _real_nvp,
-    "rational_quadratic_spline": _rational_quadratic_spline,
+    "nice": nice,
+    "real_nvp": real_nvp,
+    "rational_quadratic_spline": rational_quadratic_spline,
 }