diff --git a/anvil/benchmark_config/free_scalar_train.yml b/anvil/benchmark_config/free_scalar_train.yml index 5edd89e..9b85f20 100644 --- a/anvil/benchmark_config/free_scalar_train.yml +++ b/anvil/benchmark_config/free_scalar_train.yml @@ -12,12 +12,9 @@ couplings: # Model base: gaussian -#model: rational_quadratic_spline -#model: real_nvp -model: nice model_params: - n_affine: 2 - n_additive: 2 + layer: nice + n_blocks: 2 hidden_shape: [36] activation: tanh z2_equivar: True diff --git a/anvil/config.py b/anvil/config.py index 96953d0..7eeb68e 100644 --- a/anvil/config.py +++ b/anvil/config.py @@ -13,7 +13,7 @@ from anvil.geometry import Geometry2D from anvil.checkpoint import TrainingOutput -from anvil.models import MODEL_OPTIONS, LOADED_MODEL_OPTIONS +from anvil.models import LAYER_OPTIONS from anvil.distributions import BASE_OPTIONS, TARGET_OPTIONS from random import randint @@ -84,30 +84,12 @@ def parse_parameterisation(self, param: str): return param @explicit_node - def produce_model_action(self, model: str): + def produce_layer_action(self, layer: str): """Given a string, return the flow model action indexed by that string.""" try: - return MODEL_OPTIONS[model] + return LAYER_OPTIONS[layer] except KeyError: - raise ConfigError(f"Invalid model {model}", model, MODEL_OPTIONS.keys()) - - @explicit_node - def produce_model_to_load(self, model: str, model_params): - """Decides whether to load sequential model or a preset combination""" - if isinstance(model_params, list): - inner_models = {inner.get("model") for inner in model_params} - if ("sequential_model" in inner_models) or (None in inner_models): - raise ConfigError( - "Inner models cannot be undefined or `sequential_model`", - inner_models, - MODEL_OPTIONS.keys() - ) - if model != "sequential_model": - raise ConfigError( - "model_params can only be a list when the model is `sequential_model`" - ) - return LOADED_MODEL_OPTIONS["sequential_model"] - return LOADED_MODEL_OPTIONS["preset_model"] + raise ConfigError(f"Invalid model {layer}", layer, LAYER_OPTIONS.keys()) def parse_n_batch(self, nb: int): """Batch size for training.""" diff --git a/anvil/layers.py b/anvil/layers.py index af8997f..70296ba 100644 --- a/anvil/layers.py +++ b/anvil/layers.py @@ -186,7 +186,7 @@ def forward(self, v_in, log_density, *unused) -> torch.Tensor: s_out = self.s_network(v_for_net) t_out = self.t_network(v_for_net) - # If enforcing s(-v) = -s(v), we want to use |s(v)| in affine transf. + # If enforcing s(-v) = s(v), we want to use |s(v)| in affine transf. if self.z2_equivar: s_out = torch.abs(s_out) @@ -259,9 +259,9 @@ def forward(self, v_in, log_density, negative_mag): v_in_passive - v_in_passive.mean() ) / v_in_passive.std() # reduce numerical instability - # Naively enforce C(-v) = -C(v) + # Naively enforce C(-v) = C(v) if self.z2_equivar: - v_in_passive_stand[negative_mag] = -v_in_passive_stand[negative_mag] + v_for_net[negative_mag] = -v_for_net[negative_mag] v_out_b = torch.zeros_like(v_in_active) gradient = torch.ones_like(v_in_active).unsqueeze(dim=-1) diff --git a/anvil/models.py b/anvil/models.py index d079c09..15faec6 100644 --- a/anvil/models.py +++ b/anvil/models.py @@ -3,8 +3,10 @@ """ models.py -Module containing reportengine actions which return callable objects that execute -normalising flows constructed from multiple layers via function composition. +Module containing reportengine actions which return normalising flow models. +Generally this involves piecing together components from :py:mod:`anvil.layers` +and :py:mod:`anvil.core` to produce sequences of transformations. + """ from functools import partial @@ -14,82 +16,187 @@ import anvil.layers as layers -def coupling_pair(coupling_layer, size_half, **layer_spec): - """Helper function which returns a callable object that performs a coupling - transformation on both even and odd lattice sites.""" - coupling_transformation = partial(coupling_layer, size_half, **layer_spec) +def _coupling_pair(coupling_layer, **kwargs): + """Helper function which wraps a pair of coupling layers from + :py:mod:`anvil.layers` in the module container + :py:class`anvil.core.Sequential`. The first transformation layer acts on + the even sites and the second transformation acts on the odd sites, so one + of these blocks ensures all sites are transformed as part of an + active partition. + + """ + coupling_transformation = partial(coupling_layer, **kwargs) return Sequential( coupling_transformation(even_sites=True), coupling_transformation(even_sites=False), ) -def real_nvp( +def _real_nvp( size_half, - n_affine, + n_blocks, hidden_shape, activation="tanh", - z2_equivar=False, + z2_equivar=True, ): - """Action that returns a callable object that performs a sequence of `n_affine` - affine coupling transformations on both partitions of the input vector.""" + r"""Action which returns a sequence of ``n_blocks`` pairs of + :py:class:`anvil.layers.AffineLayer` s, followed by a single + :py:class:`anvil.layers.GlobalRescaling` all wrapped in the module container + :py:class`anvil.core.Sequential`. + + The first ``n_blocks`` elements of the outer ``Sequential`` + are ``Sequential`` s containing a pair of ``AffineLayer`` s which + act on the even and odd sites respectively. + + Parameters + ---------- + size_half: int + Inferred from ``lattice_size``, the size of the active/passive + partitions (which are equal size, `lattice_size / 2`). + n_blocks: int + The number of pairs of :py:class:`anvil.layers.AffineLayer` + transformations. + hidden_shape: list[int] + the shape of the neural networks used in the AffineLayer. The visible + layers are defined by the ``lattice_size``. Typically we have found + a single hidden layer neural network is effective, which can be + specified by passing a list of length 1, i.e. ``[72]`` would + be a single hidden layered network with 72 nodes in the hidden layer. + activation: str, default="tanh" + The activation function to use for each hidden layer. The output layer + of the network is linear (has no activation function). + z2_equivar: bool, default=True + Whether or not to impose z2 equivariance. This changes the transformation + such that the neural networks have no bias term and s(-v) = s(v) which + imposes a :math:`\mathbb{Z}_2` symmetry. + + Returns + ------- + real_nvp: anvil.core.Sequential + A sequence of affine transformations, which we refer to as a real NVP + (Non-volume preserving) flow. + + See Also + -------- + :py:mod:`anvil.core` contains the fully connected neural network class + as well as valid choices for activation functions. + + """ blocks = [ - coupling_pair( + _coupling_pair( layers.AffineLayer, - size_half, + size_half=size_half, hidden_shape=hidden_shape, activation=activation, z2_equivar=z2_equivar, ) - for i in range(n_affine) + for i in range(n_blocks) ] return Sequential(*blocks, layers.GlobalRescaling()) -def nice( +def _nice( size_half, - n_additive, + n_blocks, hidden_shape, activation="tanh", - z2_equivar=False, + z2_equivar=True, ): - """Action that returns a callable object that performs a sequence of `n_affine` - affine coupling transformations on both partitions of the input vector.""" + """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of + :py:class:`layers.AdditiveLayer` s followed by a single + :py:class:`layers.GlobalRescaling`. The pairs of ``AdditiveLayer`` s + act on the even and odd sites respectively. + + Parameters + ---------- + size_half: int + Inferred from ``lattice_size``, the size of the active/passive + partitions (which are equal size, `lattice_size / 2`). + n_blocks: int + The number of pairs of :py:class:`anvil.layers.AffineLayer` + transformations. + hidden_shape: list[int] + the shape of the neural networks used in the each layer. The visible + layers are defined by the ``lattice_size``. + activation: str, default="tanh" + The activation function to use for each hidden layer. The output layer + of the network is linear (has no activation function). + z2_equivar: bool, default=True + Whether or not to impose z2 equivariance. This changes the transformation + such that the neural networks have no bias term and s(-v) = s(v) which + imposes a :math:`\mathbb{Z}_2` symmetry. + + Returns + ------- + nice: anvil.core.Sequential + A sequence of additive transformations, which we refer to as a + nice flow. + + """ blocks = [ - coupling_pair( + _coupling_pair( layers.AdditiveLayer, - size_half, + size_half=size_half, hidden_shape=hidden_shape, activation=activation, z2_equivar=z2_equivar, ) - for i in range(n_additive) + for i in range(n_blocks) ] return Sequential(*blocks, layers.GlobalRescaling()) -def rational_quadratic_spline( +def _rational_quadratic_spline( size_half, hidden_shape, interval=5, - n_spline=1, + n_blocks=1, n_segments=4, activation="tanh", - z2_equivar_spline=False, + z2_equivar=False, ): - """Action that returns a callable object that performs a pair of circular spline - transformations, one on each half of the input vector.""" + """Similar to :py:func:`real_nvp`, excepts instead wraps pairs of + :py:class:`layers.RationalQuadraticSplineLayer` s followed by a single + :py:class:`layers.GlobalRescaling`. The pairs of RQS's + act on the even and odd sites respectively. + + Parameters + ---------- + size_half: int + inferred from ``lattice_size``, the size of the active/passive + partitions (which are equal size, `lattice_size / 2`). + hidden_shape: list[int] + the shape of the neural networks used in the each layer. The visible + layers are defined by the ``lattice_size``. + interval: int, default=5 + the interval within which the RQS applies the transformation, at present + if a field variable is outside of this region it is mapped to itself + (i.e the gradient of the transformation is 1 outside of the interval). + n_blocks: int, default=1 + The number of pairs of :py:class:`anvil.layers.AffineLayer` + transformations. For RQS this is set to 1. + n_segments: int, default=4 + The number of segments to use in the RQS transformation. + activation: str, default="tanh" + The activation function to use for each hidden layer. The output layer + of the network is linear (has no activation function). + z2_equivar: bool, default=False + Whether or not to impose z2 equivariance. This is only done crudely + by splitting the sites according to the sign of the sum across lattice + sites. + + """ blocks = [ - coupling_pair( + _coupling_pair( layers.RationalQuadraticSplineLayer, - size_half, + size_half=size_half, interval=interval, n_segments=n_segments, hidden_shape=hidden_shape, activation=activation, - z2_equivar=z2_equivar_spline, + z2_equivar=z2_equivar, ) - for _ in range(n_spline) + for _ in range(n_blocks) ] return Sequential( #layers.BatchNormLayer(), @@ -97,39 +204,23 @@ def rational_quadratic_spline( layers.GlobalRescaling(), ) +_normalising_flow = collect("layer_action", ("model_params",)) -def spline_affine(real_nvp, rational_quadratic_spline): - return Sequential(rational_quadratic_spline, real_nvp) - - -def affine_spline(real_nvp, rational_quadratic_spline): - return Sequential(real_nvp, rational_quadratic_spline) - - -_normalising_flow = collect("model_action", ("model_params",)) - -def preset_model(_normalising_flow): - return _normalising_flow[0] - - -def sequential_model(_normalising_flow): - """action which wraps a list of affine models in +def model_to_load(_normalising_flow): + """action which wraps a list of layers in :py:class:`anvil.core.Sequential`. This allows the user to specify an - arbitrary combination of layers as the model + arbitrary combination of layers as the model. + + For more information + on valid choices for layers, see :py:var:`LAYER_OPTIONS` or the various + functions in :py:mod:`anvil.models` which produce sequences of the layers + found in :py:mod:`anvil.layers`. """ return Sequential(*_normalising_flow) -MODEL_OPTIONS = { - "nice": nice, - "real_nvp": real_nvp, - "rational_quadratic_spline": rational_quadratic_spline, - "spline_affine": spline_affine, - "affine_spline": affine_spline, -} - - -LOADED_MODEL_OPTIONS = { - "preset_model": preset_model, - "sequential_model": sequential_model +LAYER_OPTIONS = { + "nice": _nice, + "real_nvp": _real_nvp, + "rational_quadratic_spline": _rational_quadratic_spline, } diff --git a/examples/runcards/train.yml b/examples/runcards/train.yml index f9c319a..945fa86 100644 --- a/examples/runcards/train.yml +++ b/examples/runcards/train.yml @@ -1,3 +1,5 @@ +# Example of how to specify a custom sequential model explicitly. + # Lattice lattice_length: 6 lattice_dimension: 2 @@ -12,18 +14,18 @@ couplings: # Model base: gaussian -model: affine_spline model_params: - hidden_shape: [72] - activation: tanh - - n_affine: 2 - z2_equivar: true - - n_spline: 1 - n_segments: 8 - z2_equivar_spline: false - + - layer: real_nvp + n_blocks: 2 + z2_equivar: true + activation: tanh + hidden_shape: [72] + - layer: rational_quadratic_spline + n_blocks: 1 + n_segments: 8 + z2_equivar: false + activation: tanh + hidden_shape: [72] # Training n_batch: 1000 diff --git a/examples/runcards/train_sequential_model.yml b/examples/runcards/train_sequential_model.yml deleted file mode 100644 index 81c735c..0000000 --- a/examples/runcards/train_sequential_model.yml +++ /dev/null @@ -1,46 +0,0 @@ -# Example of how to specify a custom sequential model explicitly. - -# Lattice -lattice_length: 6 -lattice_dimension: 2 - -# Target -target: phi_four -parameterisation: albergo2019 -couplings: - m_sq: -4 - lam: 6.975 - -# Model -base: gaussian - -model: sequential_model - -model_params: - - model: real_nvp - n_affine: 2 - z2_equivar: true - activation: tanh - hidden_shape: [72] - - model: rational_quadratic_spline - n_spline: 1 - n_segments: 8 - z2_equivar_spline: false - activation: tanh - hidden_shape: [72] - -# Training -n_batch: 1000 -epochs: 2000 -save_interval: 1000 - -# Optimizer -optimizer: Adam -optimizer_params: - lr: 0.005 - -# Scheduler -scheduler: CosineAnnealingLR -scheduler_params: - T_max: 2000 -